/**********************************************************************
DO FILE SUMMARY

This files cleans the HPACC dataset for our statin CVD prevention analysis,
conducts the analysis, and generates figures/tables for main text and appendices.

Maja E. Marcus, Sebastian Vollmer, David Flood, for HPACC collaborators

October 3, 2021
**********************************************************************/

clear
cls
version 16	
set more off
capture log close
set showbaselevels
 
* Set graph options and scheme
set scheme s1color
graph set window fontface "Arial"

* Loading original HPACC dataset
use "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Dataset/2021_09_23 - Appended dataset.dta"

* Appending additional files cleaned for this analysis
drop if country == "Fiji" // the appended file has the new Fiji steps survey
append using "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/Temporary datasets/pooled_new_surveys - 2021.04.28.dta", nolabel

* Use the macro $S_DATE to save files with current date
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper"
global date : di %tdCCYY.NN.DD date("$S_DATE","DMY")
log using "Log files/log_${date}.log", replace

////////////////////////////////////////////////////////////////////////////////
/////////////////// GENERAL DATASET CLEANING AND PREPARATION ///////////////////
////////////////////////////////////////////////////////////////////////////////

* Keep variables used here
keep country countryGDPclass WHOregionclass Population2015  /// harmonization variables
	p_id stratum_num stratum year psu_num psu svy rural  /// survey design variables
	weq_fbg wpop_fbg w_all w_fbg /// weight variables 
	age sex pregnant educat3 wealth_quintile  /// demographic
	statin aspirin mi csmoke hypt_new hbg_new rural ///
	sbp_avg tchol_mgdl ldl_mgdl bmi clin_dia /// variables needed to generate WHO lab risk scores
	sbp1 sbp2 sbp3 dbp1 dbp2 dbp3 ht wt ///
	insulin_new 
	
drop if country == "India" // India hogs memory and is not included, removing early

* Randomly restricts dataset to subset of observations to increase speed while cleaning and building code
* gen random_number = runiform()
* drop if random_number > 0.2
* drop if random_number > 0.2 & inlist(country,"Iran","Mexico")

* Clean missingness ------------------------------------------------------------

	/* See rows 517-520 of codebook v3 for the following:
	
	666666666 = variable not included in the dataset
	777777777 = subject did not know
	888888888 = subject refused to answer
	999999999 = missing due to skip pattern 	
	
	I make the decision to update the codebook as follows:
	
	.n = variable not in dataset
	.m = true missing
	0 = skipped	*/	

	foreach v of varlist statin mi rural statin {
		replace `v' = . if inlist(`v',555555555)  	// 555555555 = not eligible
		replace `v' = . if inlist(`v',666666666, 666666688)  	// 666666666 = variable not in the dataset coded as .
		replace `v' = 0 if inlist(`v',777777777,77,777777792,.d)  			// 777777777 = subject did not know
		replace `v' = .m if inlist(`v',855555554.7,855555571.2,888888888,888888896,888888896,.r)		// 888888888 = subject refused to answer
		replace `v' = 0 if inlist(`v',977777785.6,977777776.8,999999999,1000000000)  	// 999999999 = missing as skip pattern recoded as 0	
	}		
		
* Assessing and dropping countries ---------------------------------------------	
	
* Changing name of Swaziland
	replace country = "Eswatini" if country == "Swaziland"
	
* Dropping countries without statin and mi	
	drop if country == "Albania"
	* drop if country == "Armenia" // response rate <50%
	drop if country == "Belize"
	drop if country == "Brazil"
	drop if country == "Cabo Verde"
	drop if country == "Cambodia"
	drop if country == "Chile"
	drop if country == "China"
	drop if country == "Comoros"
	drop if country == "Costa Rica"
	drop if country == "Egypt"	
	drop if country == "El Salvador"
	drop if country == "Eritrea"
	drop if country == "Fiji"
	drop if country == "Fiji 2011"
	drop if country == "Gambia"
	drop if country == "Ghana"
	drop if country == "Grenada"
	drop if country == "India"
	drop if country == "Indonesia"
	drop if country == "Kazakhstan"
	drop if country == "Laos"
	drop if country == "Lesotho"
	drop if country == "Liberia"
	drop if country == "Libya"
	drop if country == "Malawi"
	drop if country == "Marshall Islands"
	drop if country == "Mexico" // Using ENSANUT 2018 instead of default MXFLS
	drop if country == "Mongolia" // Using Mongolia STEPS 2019 update
	drop if country == "Mozambique"
	drop if country == "Namibia"
	* drop if country == "Nauru" // Is it an LMIC?
	drop if country == "Niger"
	drop if country == "Peru"
	drop if country == "Romania" // Statin variable available, but mi and aspirin variables not shared with HPACC
	drop if country == "Russian Federation"
	drop if country == "Rwanda"
	drop if country == "Samoa"
	drop if country == "Sao Tome and Principe"
	drop if country == "Seychelles"
	drop if country == "Sierra Leone"
	drop if country == "South Africa"
	drop if country == "South Africa DHS"
	drop if country == "Tanzania"
	drop if country == "Togo"
	drop if country == "Tonga"
	drop if country == "Ukraine"
	drop if country == "Vanuatu"
	drop if country == "Zanzibar"
	
* Adjusting World Bank fiscal groups after careful review of World Bank fiscal year income groups
	* https://datahelpdesk.worldbank.org/knowledgebase/articles/906519-world-bank-country-and-lending-groups
replace countryGDPclass	= 1 if country == "Kenya"
replace countryGDPclass	= 2 if country == "Guyana"
replace countryGDPclass	= 1 if country == "Kyrgyzstan"
replace countryGDPclass	= 1 if country == "Myanmar"
replace countryGDPclass	= 1 if country == "Nepal"
replace countryGDPclass	= 3 if country == "Nauru"
	
* Replacing country names
replace country = "Mexico" if country == "Mexico ENSANUT" 	// Using ENSANUT 2018 instead of default MXFLS
replace country = "Mongolia" if country == "Mongolia 2019" 	// Using Mongolia STEPS 2019 update
		
* Generate a country_id variable (i.e., an id within each country)
tab country	
bysort country: gen country_id=(_n) 
label variable country_id "Within country participant number"
sort country
list country svy year if country_id == 1 // looking at country, svy type, and year in a nice list

* Encoded countries	
encode country, gen(country_encoded)	

* Clarify how many countries are in the dataset
distinct country_encoded
scalar n_countries = r(ndistinct)	

* Replacing survey year to year of data collection after meticulous check:
clonevar survey_year = year
replace survey_year = "2016-17" if country == "Algeria"
replace survey_year = "2018-19" if country == "Mexico"
replace survey_year = "2015-16" if country == "Nauru"

* Generating age categories
gen byte age_cat = .
replace age_cat = 1 if inrange(age,30,49.9)
replace age_cat = 2 if inrange(age,50,59.9)
replace age_cat = 3 if inrange(age,60,69.9)

* Labeling some variables that were not labelled in the dataset
label define sex_label 0 "Male" 1 "Female", modify
label values sex sex_label

label variable age_cat "Age category"
label define age_cat_label 1 "40-50 years" 2 "50-59 years" 3 "60-69 years",  modify
label values age_cat age_cat_label
tab age_cat

label variable educat3 "Educational attainment (3 levels)"
	label define educat3_label 0 "No schooling" 1 "Primary education" 2 "Secondary or above", modify
	label values educat3 educat3_label  

label define GDPclass 1 "LIC" 2 "LMIC" 3 "UMIC", modify
tab countryGDPclass

* Decode variables to put them in the table output
decode countryGDPclass, gen(countryGDPclass_string)
tab countryGDPclass_string

decode WHOregionclass, gen(WHOregionclass_string)
tab WHOregionclass_string

* Assessing for duplicate values
duplicates tag country p_id sbp_avg bmi, generate(dup)
list country p_id sbp_avg bmi dup if dup >0
duplicates drop country p_id sbp_avg bmi, force // 4 records in Afghanistan dropped

* Dropping any records without p_id
mdesc p_id
drop if p_id == "" // (1 observation deleted)

/*******************************************************************************
IMPORTING IN NCD POLICY SCORE AND RESPONSE RATE

This code imports the NCD Policy Score updated for 2020 by Scott Tschida.

Sources:

Allen LN, Nicholson BD, Yeung BYT, Goiana-da-Silva F. Implementation of non-communicable disease policies: a geopolitical analysis of 151 countries. Lancet Glob Health. 2020;8(1):e50-e58.

WHO. Noncommunicable diseases: Progress monitor 2020. Geneva: World Health Organization; 2020.
*******************************************************************************/

frame create policy_score
frame change policy_score

/* 	This file imports a Excel document with the aggregated NCD policy scores and 
	response rate that was generated using the methodology here:
	
	Allen LN, Nicholson BD, Yeung BYT, Goiana-da-Silva F. Implementation of non-communicable
	disease policies: a geopolitical analysis of 151 countries. Lancet Glob Health.
	2020;8(1):e50-e58.
	
	This is a 2020 update using the WHO progress monitor.
	
	Similarly, Scott helped to generate new response rates for the biochemical measurements.
	
	*/

import excel "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/External country data/NCD policy score and response rate/Surveys for Scottv2.xlsx", sheet("Sheet1") firstrow clear

rename Country country
rename ResponseRates response_rate
rename NCDPolicyScore policy_score
keep country response_rate policy_score

sort country
list country 

destring policy_score, replace
replace policy_score = policy_score/18 * 100
recast str4 response_rate, force
list country response_rate policy_score

* Saving the file as a dta
save "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/Temporary datasets/NCD policy score ${date}.dta", replace

frame change default
frame drop policy_score

* Performing the merge
merge m:1 country using "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/Temporary datasets/NCD policy score ${date}.dta"
drop if _merge == 2
drop _merge

* Data check
list country response_rate policy_score if country_id == 1

/*******************************************************************************
RURAL DATA CLEANING

This code cleans the rural variable in two of the underlying surveys (Guyana and
Burkina Faso). This step is necessary because the variable was not cleaned in the
native dataset.

******************************************************************************/

/* 	Guyana rural cleaning notes			

	 	Note: The Guyana data is surprising as higher levels of clin_dia and other
		outcomes in rural areas. However, it is consistent with published data so
		data so I do believe it is reliable:
		
		https://drc.bmj.com/content/8/1/e001349

		1. This code merges back the rural variable from the Guyana STEPS 2016 survey:
		
			First, I review the raw data. It turns out that the Guyana survey does
			not have a classic steps variable for rural (i.e., "strata") but it does
			assess rurality through different ways of doing strata. Specifically, if you look
			at page 15, you can see that strata 1-10 are rural and strata 11-17 are urban!	
			
			cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/"
			use "Dataset/Country files/Guyana 2016/GUYANA2016.dta"

			
			. tab STRATUM

				STRATUM |      Freq.     Percent        Cum.
			------------+-----------------------------------
					  1 |        125        4.68        4.68
					  2 |        156        5.84       10.52
					  3 |        396       14.83       25.36
					  4 |        664       24.87       50.22
					  5 |        193        7.23       57.45
					  6 |        246        9.21       66.67
					  7 |         42        1.57       68.24
					  8 |         43        1.61       69.85
					  9 |         93        3.48       73.33
					 10 |         42        1.57       74.91
					 11 |         46        1.72       76.63
					 12 |         97        3.63       80.26
					 13 |        293       10.97       91.24
					 14 |         53        1.99       93.22
					 15 |         28        1.05       94.27
					 16 |        113        4.23       98.50
					 17 |         40        1.50      100.00
			------------+-----------------------------------
				  Total |      2,670      100.00		  */
			
	/*	2. Changing back to default frame and then cloning p_id.   */
		* frame change default
			
		gen stratum_guyana = stratum if country == "Guyana"
		destring stratum_guyana, replace
		replace rural = 0 if inrange(stratum_guyana,11,17) & country == "Guyana" // 0 labels urban in HPACC
		replace rural = 1 if inrange(stratum_guyana,1,10) & country == "Guyana" // 1 labels rural in HPACC
		tab rural if country == "Guyana"
		drop stratum_guyana

		
/* 	Burkina Faso rural cleaning notes			

	*/
		clonevar p_id_burkinafaso_string = p_id if country == "Burkina Faso"
		
		sort p_id_burkinafaso_string
		distinct p_id_burkinafaso_string if !missing(p_id_burkinafaso_string), missing // confirming no duplicates
		merge m:1 p_id_burkinafaso_string using "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Country files/Burkina Faso/Burkina Faso merge ready dataset"
		drop if _merge == 2  // drops the merge using if not matched
		drop _merge
						
	/*	Final changes to harmonize the merged  data   	*/
		tab rural_burkinafaso if country == "Burkina Faso", missing
						
		* Recoding these changes (HPACC rural variable: 0==urban and 1==rural)
		replace rural = 0 if inlist(rural_burkinafaso,1)
		replace rural = 1 if rural_burkinafaso == 2
		
		tab rural if country == "Burkina Faso", missing   //   Look at merged results
	
		* Drop variables
		drop rural_burkinafaso p_id_burkinafaso_string


/*******************************************************************************
LIPID DATA CLEANING

This step merges in total cholesterol data for the surveys in Uganda, Georgia,
Nepal, and Kenya. This step is necessary as the data was not clean or available 
in the HPACC dataset.

******************************************************************************/

/*	Note: Tchol in mmol/dl is required for lab scores to run. This bit of code
	investigates data availability for tchol_mgdl. */.

* Reviewing data availability
	bysort country: mdesc tchol_mgdl
	
	/* 
	Tchol is not available (i.e., 100% missing) in the following countries and must be merged:
	
	Georgia -> 
	Kenya -> Original tchol not shared from Kenya; as of 4/20/21, WHO Repository request pending
	Nepal -> 
	Uganda -> Updated with STEPS data below; need to ping Michaela on this data update
	*/
	
* Merging Uganda lipid data
	/*	Tchol and HDL for Uganda were not incldued in the dataset originally shared
	but are part of the dataset on the repository. Here I merge in tchol (though not HDL).	*/

	frame create uganda
	frame change uganda
	use "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Dataset/Country files/Uganda/WHO repository version/uga2014.dta"

	gen country = "Uganda"
	gen tchol_mgdl_uganda = b8*38.67
	
		replace tchol_mgdl_uganda = . if tchol_mgdl>300
		replace tchol_mgdl_uganda = . if tchol_mgdl<3
	
	rename pid p_id
	tostring p_id, replace

	keep country p_id tchol_mgdl_uganda	

	save "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Dataset/Country files/Uganda/Uganda lipid merge/Uganda cleaned dataset for merge.dta", replace


	frame change default
	frame drop uganda
	merge m:1 country p_id using "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Dataset/Country files/Uganda/Uganda lipid merge/Uganda cleaned dataset for merge.dta"
		
	drop _merge // no unmatched from using
	replace tchol_mgdl = tchol_mgdl_uganda if country == "Uganda"
	drop tchol_mgdl_uganda
	
	* browse hdl_mgdl tchol_mgdl if country == "Uganda"

* Merging Georgia lipid data
	/*	Tchol and HDL for Georgia were not included. Here I merge in tchol (though not HDL).	*/

	frame create georgia
	frame change georgia
	use "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Dataset/Country files/Georgia/georgia dataset.dta", clear

	gen country = "Georgia"
	gen tchol_mgdl_georgia = b8*38.67
	
		replace tchol_mgdl_georgia = . if tchol_mgdl>300
		replace tchol_mgdl_georgia = . if tchol_mgdl<3
	
	rename qr p_id
	tostring p_id, replace

	keep country p_id tchol_mgdl_georgia

	save "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Dataset/Country files/Georgia/georgia lipid merge", replace


	frame change default
	frame drop georgia
	merge m:1 country p_id using "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Dataset/Country files/Georgia/georgia lipid merge.dta"
	
	drop if _merge == 2 // 8 records with almost complete missing data dropped from raw dataset
	drop _merge
	replace tchol_mgdl = tchol_mgdl_georgia if country == "Georgia"
	drop tchol_mgdl_georgia
	
	* browse hdl_mgdl tchol_mgdl if country == "Georgia"
	
* Merging Nepal; lipid data
	/*	Tchol and HDL for Nepal were not included. Here I merge in tchol (though not HDL).	*/

	frame create nepal
	frame change nepal
	use "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Dataset/Country files/Nepal/Nepal 2019 STEPS/npl2019.dta", clear

	gen country = "Nepal"
	gen tchol_mgdl_nepal = b8
	
		replace tchol_mgdl_nepal = . if tchol_mgdl>300
		replace tchol_mgdl_nepal = . if tchol_mgdl<3
	
	rename pid p_id
	tostring p_id, replace

	keep country p_id tchol_mgdl_nepal

	save "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Dataset/Country files/Nepal/Nepal 2019 STEPS/nepal lipid merge.dta", replace


	frame change default
	frame drop nepal
	merge m:1 country p_id using "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Dataset/Country files/Nepal/Nepal 2019 STEPS/nepal lipid merge.dta"
	
	drop _merge // no unmatched from using
	replace tchol_mgdl = tchol_mgdl_nepal if country == "Nepal"
	drop tchol_mgdl_nepal
	
	* browse hdl_mgdl tchol_mgdl if country == "Nepal"
	
* Merging Kenya lipid data
	/*	Tchol and HDL for Kenya were not incldued in the dataset originally shared
	but are part of the dataset on the repository. Here I merge in tchol (though not HDL).	*/

	frame create kenya
	frame change kenya
	use "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Dataset/Country files/Kenya/Kenya 2015 STEPS repository data/ken2015.dta"

	gen country = "Kenya"
	gen tchol_mgdl_kenya = b8*38.67
	
		replace tchol_mgdl_kenya = . if tchol_mgdl>300
		replace tchol_mgdl_kenya = . if tchol_mgdl<3
	
	rename (age sex m4a m4b m5a m5b m6a m6b m11 m12 b5) (age sex_string sbp1 dbp1 sbp2 dbp2 sbp3  dbp3 ht wt fbg_new_kenya)
	
	gen sex = 1 if sex_string == "Women"
	replace sex = 0 if sex_string == "Men"
	
	tostring psu, replace
	sort psu
	
	* Assessing for duplicate values 1
	duplicates tag country sex psu sbp1 sbp2 sbp3 dbp1 dbp2 dbp3, generate(dup)
	list country country sex psu sbp1 sbp2 sbp3 dbp1 dbp2 dbp3 tchol_mgdl fbg_new_kenya if dup >0
	drop if dup >0 & tchol_mgdl == .
	duplicates drop country sex psu sbp1 sbp2 sbp3 dbp1 dbp2 dbp3, force // 2 records dropped with tchol 
	
	keep country sex psu age sbp1 sbp2 sbp3 dbp1 dbp2 dbp3 tchol_mgdl_kenya

	save "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Dataset/Country files/Kenya/Kenya lipid merge/Kenya cleaned dataset for merge.dta", replace


	frame change default
	frame drop kenya
	merge m:1 country sex psu sbp1 sbp2 sbp3 dbp1 dbp2 dbp3 using "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Dataset/Country files/Kenya/Kenya lipid merge/Kenya cleaned dataset for merge.dta"
	* browse country sex psu sbp1 sbp2 sbp3 dbp1 dbp2 dbp3 tchol_mgdl tchol_mgdl_kenya _merge if country == "Kenya"
	
	drop if _merge == 2
	drop _merge // no unmatched from using
	replace tchol_mgdl = tchol_mgdl_kenya if country == "Kenya"
	drop tchol_mgdl_kenya
	
	* browse hdl_mgdl tchol_mgdl if country == "Kenya"	
	
* Final tchol data check
bysort country: mdesc tchol_mgdl // only Kenya missing, see above for details on this

/*******************************************************************************
IMPORTING IN 2019 POPULATION FROM 40-69 YEARS OF AGE BY COUNTRY

This step imports in data on the
*******************************************************************************/

frame create population
frame change population

	/* 		
	
	This file imports a CSV document with the GBD population estimates for 2019 by country
	
	http://ghdx.healthdata.org/record/ihme-data/gbd-2019-population-estimates-1950-2019
	
	There is no data manipulation done to the CSV file downloaded from IHME prior
	to its import here.
	
	*/

import delimited "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/External country data/Population/IHME_GBD_2019_POP_2019_Y2020M10D15.CSV", clear 

rename (location_name) (country)

keep country year age_group_id sex_name val sex_name location_id

drop if sex_name != "both"

drop if location_id == 533
drop if country =="South Asia"
drop if country =="North Africa and Middle East"
drop sex_name sex_name

reshape wide val, i(country) j(age_group_id)

* Values 13-18 represent 5-year age categories of 40-69
keep country val13 val14 val15 val16 val17 val18 val22 // 22 is all ages

* Generates the row total of the relevant age subgroups
egen population_2019_40_69 = rowtotal(val13 val14 val15 val16 val17 val18)

* Generates total population in 2019
rename val22 population_2019_all_ages

keep country population_2019_40_69 population_2019_all_ages

* Renaming some countries
replace country = "Iran" if country == "Iran (Islamic Republic of)"
replace country = "Moldova" if country == "Republic of Moldova"
replace country = "St. Vincent & the Grenadines" if country == "Saint Vincent and the Grenadines"
replace country = "Timor Leste" if country == "Timor-Leste"
replace country = "Vietnam" if country == "Viet Nam"

save "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/Temporary datasets/GBD 2019 population.dta", replace

frame change default
frame drop population

* Performing the merge
merge m:1 country using "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/Temporary datasets/GBD 2019 population.dta"

* Checking merged data
list country Population2015 population_2019_40_69 population_2019_all_ages if country_id == 1

drop if _merge == 2
drop _merge
	
/*******************************************************************************
2007 WHO/ISH risk scores

	This is a multistep code to WHO/ISH 2007 CVD scores.
	
	Step 1: Preparing data
	Step 2: Exports data for use in R's 2007 "whoishrisk" module
	Step 3: Running the code in R -> as of 4/30/2021, this was "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/Temporary datasets/R WHO ISH scores/WHO ISH 2007 risk scores v9.R"
	Step 4: Re-importing and processing the scores generated in R
		
	Citations:
	
		https://rdrr.io/github/DylanRJCollins/whoishRisk/man/WHO_ISH_Risk.html
		
		Collins D, Lee J, Bobrovitz N, Koshiaris C, Ward A, Heneghan C. whoishRisk
		- an R package to calculate WHO/ISH cardiovascular risk scores for all
		epidemiological subregions of the world. F1000Res. 2016;5:2522.
	
	Required variables for whoishrisk:
		
		age 		Age 							numeric 	Continuous (years) 
		gdr 		Gender 							numeric 	Dichotomous (0=female; 1=male) 
		smk 		Smoking 						numeric 	Dichotomous (0=non-smoker; 1=smoker) 
		sbp 		Systolic blood pressure 		numeric 	Continuous (mmHg) 
		dm 			Diabetes mellitus status		numeric 	Dichotomous (0=not diabetic; 1=diabetic) 
		chl 		Total cholesterol 	numeric 	Continuous	(mmol/ L); 0=unknown cholesterol 
		subregion 	WHO epidemiological subregion 	character 	
						“AFR_D”,
						“AFR_E”,
						“AMR_A”,
						“AMR_B”,
						“AMR_D”,
						“EMR_B”,
						“EMR_D”,
						“EUR_A”,
						“EUR_B”,
						"EUR_C”,
						“SEAR_B”,
						“SEAR_D”,
						“WPR_A”,
						“WPR_B” 		

	Epidemiological subregions: 	
	
		https://www.who.int/quantifying_ehimpacts/global/ebdcountgroup/en/
			
******************************************************************************/

*	Step 1: Preparing data
	
	* Generating subregions
		gen WHOsubregionclass = ""
		
		replace WHOsubregionclass = "AFR_D" if ///
			country == "Algeria" | ///
			country == "Algeria" | ///
			country == "Angola" | ///
			country == "Benin" | ///
			country == "Burkina Faso" | ///
			country == "Cameroon" | ///
			country == "Cape Verde" | ///
			country == "Chad" | ///
			country == "Comoros" | ///
			country == "Equatorial Guinea" | ///
			country == "Gabon" | ///
			country == "Gambia" | ///
			country == "Ghana" | ///
			country == "Guinea" | ///
			country == "Guinea-Bissau" | ///
			country == "Liberia" | ///
			country == "Madagascar"  | ///
			country == "Mali" | ///
			country == "Mauritania" | ///
			country == "Mauritius" | ///
			country == "Niger" | ///
			country == "Nigeria" | ///
			country == "Sao Tome and Principe" | ///
			country == "Senegal" | ///
			country == "Seychelles" | ///
			country == "Sierra Leone" | ///
			country == "Togo" //
		
		replace WHOsubregionclass = "AFR_E" if ///
			country == "Botswana" | ///
			country == "Burundi" | ///
			country == "Central African Republic" | ///
			country == "Congo" | ///
			country == "Côte d'Ivoire" | ///
			country == "Democratic Republic of the Congo" | ///
			country == "Eritrea" | ///
			country == "Ethiopia" | /// 
			country == "Kenya" | /// 
			country == "Lesotho" | ///
			country == "Malawi" | ///
			country == "Mozambique" | ///
			country == "Namibia" | ///
			country == "Rwanda" | ///
			country == "South Africa" | /// 
			country == "Eswatini" | ///
			country == "Uganda" | ///
			country == "Tanzania" | ///
			country == "Zambia" | ///
			country == "Zimbabwe" //
					
		replace WHOsubregionclass = "AMR_A" if ///
			country == "Canada" | ///
			country == "Cuba" | ///
			country == "United States of America" ///
		
		replace WHOsubregionclass = "AMR_B" if ///
			country == "Antigua and Barbuda" | ///
			country == "Argentina" | ///
			country == "Bahamas" | ///
			country == "Barbados" | /// 
			country == "Belize" | ///
			country == "Brazil" | ///
			country == "Chile" | ///
			country == "Colombia" | /// 
			country == "Costa Rica" | ///
			country == "Dominica" | ///
			country == "Dominican Republic" | ///
			country == "El Salvador" | ///
			country == "Grenada" | ///
			country == "Guyana" | ///
			country == "Honduras" | ///
			country == "Jamaica" | ///
			country == "Mexico" | ///
			country == "Panama" | ///
			country == "Paraguay" | ///
			country == "Saint Kitts and Newis" | ///
			country == "Saint Lucia" | ///
			country == "St. Vincent & the Grenadines" | ///
			country == "Suriname" | ///
			country == "Trinidad and Tobago" | ///
			country == "Uruguay" | ///
			country == "Venezuela" //
		
		replace WHOsubregionclass = "AMR_D" if ///
			country == "Bolivia" | ///
			country == "Ecuador" | ///
			country == "Guatemala" | /// 
			country == "Haiti" | ///
			country == "Nicaragua" | /// 
			country == "Peru" //
		
		replace WHOsubregionclass = "EMR_B" if ///
			country == "Bahrain" | ///
			country == "Cyprus" | ///
			country == "Iran" | ///
			country == "Jordan" | ///
			country == "Kuwait" | ///
			country == "Lebanon" | ///
			country == "Libya" | ///
			country == "Oman" | ///
			country == "Qatar" | ///
			country == "Saudi Arabia" | ///
			country == "Syrian" | ///
			country == "Tunisia" | ///
			country == "United Arab Emirates"  ///
		
		replace WHOsubregionclass = "EMR_D" if ///
			country == "Afghanistan" | ///
			country == "Djibouti" | ///
			country == "Egypt" | ///
			country == "Iraq" | ///
			country == "Morocco" | ///
			country == "Pakistan" | ///
			country == "Somalia" | ///
			country == "Sudan" | ///
			country == "Yemen" //
		
		replace WHOsubregionclass = "EUR_A" if ///
			country == "Andorra" | ///
			country == "Austria" | ///
			country == "Belgium" | ///
			country == "Croatia" | ///
			country == "Czech Republic" | ///
			country == "Denmark" | ///
			country == "Finland" | ///
			country == "France" | ///
			country == "Germany" | ///
			country == "Greece" | ///
			country == "Iceland" | ///
			country == "Ireland" | ///
			country == "Israel" | ///
			country == "Italy" | ///
			country == "Luxembourg" | ///
			country == "Malta" | ///
			country == "Monaco" | ///
			country == "Netherlands" | ///
			country == "Norway" | ///
			country == "Portugal" | ///
			country == "San Marino" | ///
			country == "Slovenia" | ///
			country == "Spain" | ///
			country == "Sweden" | ///
			country == "Switzerland" | ///
			country == "United Kingdom" //
		
		replace WHOsubregionclass = "EUR_B" if ///
			country == "Albania" | ///
			country == "Armenia" | ///
			country == "Azerbaijan" | ///
			country == "Bosnia and Herzegovia" | ///
			country == "Bulgaria" | ///
			country == "Georgia" | ///
			country == "Kyrgyzstan" | ///
			country == "Poland" | ///
			country == "Romania" | ///
			country == "Slovakia" | ///
			country == "Tajikistan" | ///
			country == "Macedonia" | ///
			country == "Turkey" | ///
			country == "Turkmenistan" | ///
			country == "Uzbekistan" | ///
			country == "Yugoslavia" //
	
	
		replace WHOsubregionclass = "EUR_C" if ///
			country == "Belarus" | ///
			country == "Estonia" | ///
			country == "Hungary" | ///
			country == "Kazakhstan" | ///
			country == "Latvia" | ///
			country == "Lithuania" | ///
			country == "Moldova" | ///
			country == "Russian Federation" | ///
			country == "Ukraine" //
		
		replace WHOsubregionclass = "SEAR_B" if ///
			country == "Indonesia" | ///
			country == "Sri Lanka" | ///
			country == "Thailand" //
		
		replace WHOsubregionclass = "SEAR_D" if ///
			country == "Bangladesh" | /// 
			country == "Bhutan" | ///
			country == "Democratic People's Republic of Korea" | ///
			country == "India" | ///
			country == "Maldives" | ///
			country == "Myanmar" | ///
			country == "Nepal" | ///
			country == "Timor Leste" //
		
		replace WHOsubregionclass = "WPR_A" if ///
			country == "Australia" | ///
			country == "Brunei Darussalam" | ///
			country == "Japan" | ///
			country == "New Zealand" | ///
			country == "Singapore" //
		
		replace WHOsubregionclass = "WPR_B" if /// 		
			country == "Cambodia" | ///
			country == "China" | ///
			country == "Cook Islands" | ///
			country == "Fiji" | ///
			country == "Kiribati" | ///
			country == "Laos" | ///
			country == "Malaysia" | ///
			country == "Marshall Islands" | ///
			country == "Micronesia" | ///
			country == "Mongolia" | ///
			country == "Nauru" | ///
			country == "Niue" | ///
			country == "Palau" | ///
			country == "Papua New Guinea" | ///
			country == "Philippines" | ///
			country == "Republic of Korea" | ///
			country == "Samoa" | ///
			country == "Solomon Islands" | ///
			country == "Tokelau" | ///
			country == "Tonga" | ///
			country == "Tuvalu" | ///
			country == "Vanuatu" | ///
			country == "Vietnam" //

*	Step 2: Exports data for use in R's 2007 "whoishrisk" module
				
	preserve	
		
	* Generating tchol_mmoll, which is in the units needed for whocvdrisk
		gen tchol_mmoll=tchol_mgdl/38.67
		recast double tchol_mmoll 
		label variable tchol_mmoll "Total cholesterol (mmol/l)"
		replace tchol_mmoll = . if tchol_mgdl== .  // adjustment of mmoll values	
		
	* Other data prepping for WHO_ISH_Risk
		drop if age <=18 | age >=100
		drop if sbp_avg <90 | sbp_avg >250
		drop if tchol_mmoll >10
		
		drop if age == .
		drop if sex == .
		drop if csmoke == .
		drop if sbp_avg == .
		drop if clin_dia == .
		drop if tchol_mmoll == .
						
	keep country p_id sbp_avg bmi age sex csmoke sbp_avg clin_dia tchol_mmoll WHOsubregionclass	
	
	save "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/Temporary datasets/R WHO ISH scores/cleaned statin data for r who ish risk scores FULL.dta", replace
	
	foreach r in AFR_D AFR_E AMR_B AMR_D EMR_B EMR_D EUR_B EUR_C SEAR_B SEAR_D WPR_B {
		use "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/Temporary datasets/R WHO ISH scores/cleaned statin data for r who ish risk scores FULL.dta" ///
			if WHOsubregionclass == "`r'", replace
		save "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/Temporary datasets/R WHO ISH scores/`r'.dta", replace
				
	}

	restore
	
* Step 3: Running the code in R -> as of 4/30/2021, this was "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/Temporary datasets/R WHO ISH scores/WHO ISH 2007 risk scores v9.R"
	
* Step 4: Re-importing and processing the scores generated in R	
	
	* Reimporting and merging data after generating the scores using whoishrisk in R
	gen who2007cvdrisk_string = ""

	foreach r in AFR_D AFR_E AMR_B AMR_D EMR_B EMR_D EUR_B EUR_C SEAR_B SEAR_D WPR_B {
		merge m:1 country p_id sbp_avg bmi using "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/Temporary datasets/R WHO ISH scores/`r' - R risk scores.dta"
		replace who2007cvdrisk_string = who2007cvdrisk_`r' if _merge == 3
		drop who2007cvdrisk_`r'
		drop _merge
	}
	
	tab country who2007cvdrisk_string
	
	* Manually encode who2007cvdrisk
	gen who2007cvdrisk = .
	replace who2007cvdrisk = 1 if who2007cvdrisk_string == "<10%"
	replace who2007cvdrisk = 2 if who2007cvdrisk_string == "10% to <20%"
	replace who2007cvdrisk = 3 if who2007cvdrisk_string == "20% to <30%"
	replace who2007cvdrisk = 4 if who2007cvdrisk_string == "30% to <40%"
	replace who2007cvdrisk = 5 if who2007cvdrisk_string == ">=40%"
	
	* Labeling
	label variable who2007cvdrisk "2007 WHO/ISH risk category"
	label define who2007cvdrisk_label 1 "<10%" 2 "10% to <20%" 3 "20% to <30%" 4 "30% to <40%" 5 ">=40%",  modify
	label values who2007cvdrisk who2007cvdrisk_label
	
	* Checking out the data
	tab country who2007cvdrisk if inrange(age,40,69), row
	tab country who2007cvdrisk if inrange(age,40,69), row m
	

/**********************************************************************
Calculating 2019 WHO CVD risk

See "help whocvdrisk" for more. Required variables include:

    ccode     - 3 letter country code identifier (ISO 3166-1 alpha-3)
    sex       - Sex (1 = Male, 2 = Female)
    ages      - Age (years)
    smallbin  - Smoking status (0 = Other, 1 = Current)
    hxdiabbin - History of diabetes (0 = No, 1 = Yes)
    sbp       - Systolic blood pressure (mm Hg)
    tchol     - Total cholesterol (mmol/L)
    bmi       - Body mass index (kg/m^2)

Citation: Kaptoge S, Pennells L, De Bacquer D, Cooney MT, Kavousi M, Stevens G,
et al. World Health Organization cardiovascular disease risk charts: revised
models to estimate risk in 21 global regions. The Lancet Global Health. 2019.

Note: whocvdrisk uses "country," "year," and other variables. It will give a 
weird error if those variables are coded in the correct way.

**********************************************************************/

//	Using Kountry codes
*	Note that whocvdrisk rquires country codes in ISO 3166 alpha-3 (iso3c) format

kountry country, from(other) stuck
rename _ISO3N_ iso3n
kountry iso3n, from(iso3n) to(iso3c)
rename _ISO3C_ ccode
drop iso3n

*	Updating country codes
replace ccode = "TZA" if country == "Zanzibar"
replace ccode = "SWZ" if country == "Eswatini"
replace ccode = "TK" if country == "Tokelau"

*	Checking data 
list country ccode if country_id == 1

//	Generating 10-year WHO CVD risk using 2019 equations
clonevar ages = age
recode sex (0=1)(1=2)

// These countries don't run so we default them temporarily to Solomon Island's country code
replace ccode = "SLB" if country == "Tuvalu"
replace ccode = "SLB" if country == "Nauru"
replace ccode = "SLB" if country == "Tokelau"

//	Dummy variables needed to get whocvdrisk to run
gen hxdiabbin = clin_dia
gen tchol = tchol_mgdl/38.67 // requires in mmoll
clonevar smallbin = csmoke
rename year year_renamed
rename country country_renamed
rename sbp_avg sbp

//	Assessing codebook of required WHO variables
* codebook ccode sex ages smallbin hxdiabbin tchol sbp bmi

whocvdrisk	
rename cal2_who_cvdx_m1 who_10yr_lab
rename cal2_who_cvdx_m2 who_10yr_nonlab

//	Recoding sex back to original HPACC code
recode sex (1=0)(2=1)
rename year_renamed year
rename country_renamed country
rename sbp sbp_avg 
replace ccode = "TUV" if country == "Tuvalu"
replace ccode = "NRU" if country == "Nauru"
replace ccode = "TK" if country == "Tokelau"

*****

gen risk_above_20_nonlab =.
replace risk_above_20_nonlab = 1 if who_10yr_nonlab > 0.2 & who_10yr_nonlab <1
replace risk_above_20_nonlab = 0 if who_10yr_nonlab <= 0.2
replace risk_above_20_nonlab = . if missing(who_10yr_nonlab)

gen risk_above_20_lab =.
replace risk_above_20_lab = 1 if who_10yr_lab > 0.2 & who_10yr_lab <1
replace risk_above_20_lab = 0 if who_10yr_lab <= 0.2
replace risk_above_20_lab = . if missing(who_10yr_lab)

tab country risk_above_20_nonlab if inrange(age,40,69), missing
tab country risk_above_20_lab if inrange(age,40,69), missing

////////////////////////////////////////////////////////////////////////////////
/////////////////////////// MAIN OUTCOME DEFINITIONS ///////////////////////////
////////////////////////////////////////////////////////////////////////////////

/*******************************************************************************
GENERATING THE SAMPLE OF PARTICIPANTS
******************************************************************************/		

* Total sample for use in survey characteristics table
gen byte sample_all = 1 if inrange(age,40,69) & pregnant != 1
tab country sample_all
tab country sample_all, m

* Primary prevention denominator
gen byte samp_pri_lab_statin = 1 if inrange(age,40,69) & pregnant != 1 & mi == 0 & statin != . & (risk_above_20_lab == 1 | (hbg_new == 1 & clin_dia == 1))
replace samp_pri_lab_statin = 0 if inrange(age,40,69) & pregnant != 1 & mi == 0 & statin != . & (risk_above_20_lab == 0 & (hbg_new == 0 | clin_dia == 0))
tab country samp_pri_lab_statin
tab country samp_pri_lab_statin, m

* Secondary prevention denominator
gen byte samp_sec_statin = 1 if inrange(age,40,69) & pregnant != 1 & mi == 1 & statin != .
replace samp_sec_statin = 0 if inrange(age,40,69) & pregnant != 1 & mi == 0 & statin != .
tab country samp_sec_statin
tab country samp_sec_statin, m

* Missingness among all sample
tab country mi if sample_all == 1, missing row
tab country statin if sample_all == 1, missing row

/*******************************************************************************
GENERATING THE OUTCOMES
******************************************************************************/		

* Primary CVD prevention
gen byte statin_pri_lab = .
replace statin_pri_lab = 0 if samp_pri_lab_statin == 1 & statin == 0
replace statin_pri_lab = 1 if samp_pri_lab_statin == 1 & statin == 1
tab statin_pri_lab

* Secondary CVD prevention
gen byte statin_sec = .
replace statin_sec = 0 if samp_sec_statin == 1 & statin == 0
replace statin_sec = 1 if samp_sec_statin == 1 & statin == 1
tab statin_sec

* Among people using meds
gen statin_den_who = .
replace statin_den_who = 1 if mi == 1 & statin == 1 & inrange(age,18,69) & pregnant != 1
replace statin_den_who = 2 if mi == 0 & statin == 1 & inrange(who_10yr_lab,.20,1) & inrange(age,18,69) & pregnant != 1
replace statin_den_who = 3 if mi == 0 & statin == 1 & inrange(who_10yr_lab,.10,.199999999) & inrange(age,18,69) & pregnant != 1
replace statin_den_who = 4 if mi == 0 & statin == 1 & inrange(who_10yr_lab,0,.099999999) & inrange(age,18,69) & pregnant != 1
replace statin_den_who = . if country == "Iraq" // Take care here -> Iraq CVD med question was conditional on reporting an MI/stroke
tab country statin_den_who

////////////////////////////////////////////////////////////////////////////////
///////////////////// MAIN ANALYSIS USING POPULATION WEIGHTS ///////////////////
////////////////////////////////////////////////////////////////////////////////

/*******************************************************************************
This analysis uses population weighting by GBD estimates of the population of
individuals ages 40-69 years of age in each country.

A sensitivity analysis using equal countries weights is performed later.
******************************************************************************/
	
* Estimating proportions  ------------------------------------------------------

	* Primary prevention outcome using fbg weights
	
		foreach var of varlist samp_pri_lab_statin statin_pri_lab {

			* Sample weights
			gen sample = 1 if `var' != .

			bys country: egen sum_wpop_sample=sum(w_fbg) if sample==1   // pop weights not needed
			gen wpopnr_sample=w_fbg*population_2019_40_69/sum_wpop_sample

			* bys country: egen sum_weq_sample=sum(w_fbg) if sample==1		
			* gen weqnr_sample=w_fbg/sum_weq_sample
			
			* Set svyset
			svyset psu_num [pw = wpopnr_sample], strata(stratum_num) singleunit(centered)
			
			* Estimates overall output
			svy, subpop(sample): proportion `var'
			estimates store wpop_ov_`var'
			matrix wpop_ov_`var' = r(table)
			
			* Estimates by country
			svy, subpop(sample): proportion `var', over(country_encoded)
			estimates store wpop_c_`var'
			matrix wpop_c_`var' = r(table)
			
			* Estimates by country group
			svy, subpop(sample): proportion `var', over(WHOregionclass)
			estimates store wpop_r_`var'
			matrix wpop_r_`var' = r(table)
			
			svy, subpop(sample): proportion `var', over(countryGDPclass)
			estimates store wpop_i_`var'
			matrix wpop_i_`var' = r(table)
			
			* Drop variables for loop
			drop sample wpopnr_sample sum_wpop_sample
		}
	
	* Secondary prevention outcome using all weights
		
		foreach var of varlist samp_sec_statin statin_sec {

			* Sample weights
			gen sample = 1 if `var' != .

			bys country: egen sum_wpop_sample=sum(w_all) if sample==1   // pop weights not needed
			gen wpopnr_sample=w_all*population_2019_40_69/sum_wpop_sample

			* bys country: egen sum_weq_sample=sum(w_all) if sample==1		
			* gen weqnr_sample=w_all/sum_weq_sample
			
			* Set svyset
			svyset psu_num [pw = wpopnr_sample], strata(stratum_num) singleunit(centered)
			
			* Estimates overall output
			svy, subpop(sample): proportion `var'
			estimates store wpop_ov_`var'
			matrix wpop_ov_`var' = r(table)
			
			* Estimates by country
			svy, subpop(sample): proportion `var', over(country_encoded)
			estimates store wpop_c_`var'
			matrix wpop_c_`var' = r(table)
			
			* Estimates by country group
			svy, subpop(sample): proportion `var', over(WHOregionclass)
			estimates store wpop_r_`var'
			matrix wpop_r_`var' = r(table)
			
			svy, subpop(sample): proportion `var', over(countryGDPclass)
			estimates store wpop_i_`var'
			matrix wpop_i_`var' = r(table)
			
			* Drop variables for loop
			drop sample wpopnr_sample sum_wpop_sample
		}
	
* Multivariable Poisson regression models --------------------------------------

	* Primary prevention outcome using fbg weights

		foreach var of varlist statin_pri_lab {

			* Sample weights
			gen sample = 1 if `var' !=. & age !=. & sex !=. & educat3 !=. & rural !=.

			bys country: egen sum_wpop_sample=sum(w_fbg) if sample==1   // pop weights not needed
			gen wpopnr_sample=w_fbg*population_2019_40_69/sum_wpop_sample

			* bys country: egen sum_weq_sample=sum(w_fbg) if sample==1		
			* gen weqnr_sample=w_fbg/sum_weq_sample
			
			* Set svyset
			svyset psu_num [pw = wpopnr_sample], strata(stratum_num) singleunit(centered)
			
			* Regression
			svy: poisson `var' i.age_cat i.sex i.educat3 i.rural i.country_encoded if sample == 1, irr baselevels
			estimates store wpop_pois_`var'
			matrix wpop_pois_`var' = r(table)
				
			* Marginal effects
			margins, dydx(i.age_cat i.rural i.sex i.educat3) vce(unconditional) post baselevels
			estimates store wpop_margins_`var'
			matrix wpop_margins_`var' = r(table)	
			
			* Drop variables for loop
			drop sample wpopnr_sample sum_wpop_sample
		}

	* Secondary prevention outcome using all weights
	
		foreach var of varlist statin_sec {

			* Sample weights
			gen sample = 1 if `var' !=. & age !=. & sex !=. & educat3 !=. & rural !=.

			bys country: egen sum_wpop_sample=sum(w_all) if sample==1   // pop weights not needed
			gen wpopnr_sample=w_all*population_2019_40_69/sum_wpop_sample

			* bys country: egen sum_weq_sample=sum(w_all) if sample==1		
			* gen weqnr_sample=w_all/sum_weq_sample
			
			* Set svyset
			svyset psu_num [pw = wpopnr_sample], strata(stratum_num) singleunit(centered)
			
			* Regression
			svy: poisson `var' i.age_cat i.sex i.educat3 i.rural i.country_encoded if sample == 1, irr baselevels
			estimates store wpop_pois_`var'
			matrix wpop_pois_`var' = r(table)
				
			* Marginal effects
			margins, dydx(i.age_cat i.rural i.sex i.educat3) vce(unconditional) post baselevels
			estimates store wpop_margins_`var'
			matrix wpop_margins_`var' = r(table)	
			
			* Drop variables for loop
			drop sample wpopnr_sample sum_wpop_sample
		}

////////////////////////////////////////////////////////////////////////////////
////////////////////// TABLE 1: SURVEY CHARACTERISTICS /////////////////////////
////////////////////////////////////////////////////////////////////////////////	

* Create the Excel doc
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/"
putexcel set "Putexcel tables/Table survey characteristics ${date}.xlsx", replace 
	   	   
* Make the table frame
putexcel A1 = "Country"
putexcel B1 = "ISO code"
putexcel C1 = "Income group"
putexcel D1 = "Year"
putexcel E1 = "Response rate"
putexcel F1 = "Sample size"
putexcel G1 = "Proportion female, %"
putexcel H1 = "Median age (IQR), years"
putexcel I1 = "2015 population, thousands"
putexcel J1 = "Region"

* Macro for bottom row
scalar bottom_row = `=scalar(n_countries)'+2	

* Sorting the dataset
sort country_id country 

* Column: Country --------------------------------------------------------------
forval n = 1/`=scalar(n_countries)' {
local row = `n'+1	
putexcel A`row' = country[`n']
}

* Column: ISO code -------------------------------------------------------------
forval n = 1/`=scalar(n_countries)' {
local row = `n'+1	
putexcel B`row' = ccode[`n']
}

* Column: Income group ---------------------------------------------------------
forval n = 1/`=scalar(n_countries)' {
local row = `n'+1	
putexcel C`row' = countryGDPclass_string[`n']
}

* Column: Year -----------------------------------------------------------------
forval n = 1/`=scalar(n_countries)' {
local row = `n'+1	
putexcel D`row' = survey_year[`n']
}

* Column: Response rate --------------------------------------------------------
forval n = 1/`=scalar(n_countries)' {
local row = `n'+1	
putexcel E`row' = response_rate[`n']
}

* Column: Sample size ----------------------------------------------------------
forval n = 1/`=scalar(n_countries)' {
local row = `n'+1	
count if inlist(sample_all,1) & country_encoded == `n'
local sample = r(N)
putexcel F`row' = `sample'
}
putexcel F2:F`=scalar(bottom_row)',nformat(number_sep)

* Column: Female ---------------------------------------------------------------
forval n = 1/`=scalar(n_countries)' {
	local row = `n'+1	
	tab sex if country_encoded == `n' & sample_all == 1, matcell(mat_female)
	matlist mat_female
	local loc_female = mat_female[2,1]/(mat_female[1,1]+mat_female[2,1])*100
	dis `loc_female'
	putexcel G`row' = `loc_female'
}
putexcel G2:G`=scalar(bottom_row)',nformat(number)

* Column: Median age (IQR) -----------------------------------------------------
forval n = 1/`=scalar(n_countries)' {
	local row = `n'+1	
	sum age if sample_all == 1 & country_encoded == `n', detail
	
	local min1 = r(p25)
	di `min1'
	local min2 = string(`min1',"%9.0f")
	di `min2'
	
	local med1 = r(p50)
	di `med1'
	local med2 = string(`med1',"%9.0f")
	di `med2'
	
	local max1 = r(p75)
	di `max1'
	local max2 = string(`max1',"%9.0f")
	di `max2'
	
	putexcel H`row' = "`med2' (`min2'-`max2')"
}

* Column: Population -----------------------------------------------------------
sort country_id country_encoded
forval n = 1/`=scalar(n_countries)' {
	local row = `n'+1	
	putexcel I`row' = Population2015[`n']
	}

* Column: Region ---------------------------------------------------------------
sort country_id country_encoded
forval n = 1/`=scalar(n_countries)' {
	local row = `n'+1	
	putexcel J`row' = WHOregionclass_string[`n']
	}

* Summary row at bottom --------------------------------------------------------
	
* Overall total label
	putexcel A`=scalar(bottom_row)' = "Overall total"

* Sample all
	tab sample_all, matcell(sample)
	matlist sample
	local sample = sample[1,1]
	dis `sample'
	putexcel F`=scalar(bottom_row)' = `sample',nformat(number_sep)
	
* Median age
	bysort country sample_all: egen median_age = median(age) if sample_all == 1	
 
////////////////////////////////////////////////////////////////////////////////
/////// MAIN FOREST PLOTS USING WPOP: FIGURE 1 AND REGRESSION APPENDICES ///////
////////////////////////////////////////////////////////////////////////////////

/*******************************************************************************
SEPARATE WPOP FORESTPLOT BY REGION, INCOME GROUP, AND OVERALL
*******************************************************************************/	

foreach var in sec pri_lab {
	
* This step is clunky but gets all the estimates in an easy step
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper"
putexcel set "Putexcel tables/wpop forestplot_regiongdp `var' ${date}.xlsx", replace 

putexcel A1 = "category"
putexcel B1 = "est"
putexcel C1 = "lci"
putexcel D1 = "uci"

putexcel A2 = "Region"
putexcel A3 = "Africa"
putexcel A4 = "Americas"
putexcel A5 = "South East Asia"
putexcel A6 = "Western Pacific"
putexcel A7 = "Europe"
putexcel A8 = "Eastern Mediterranean"

putexcel A10 = "Income group"
putexcel A11 = "Low income"
putexcel A12 = "Lower middle"
putexcel A13 = "Upper middle"

putexcel A15 = "Overall"

// Region
matlist wpop_r_statin_`var'
matrix results = wpop_r_statin_`var'

	* est
	putexcel B3 = results[1,7]
	putexcel B4 = results[1,8]
	putexcel B5 = results[1,9]
	putexcel B6 = results[1,10]
	putexcel B7 = results[1,11]
	putexcel B8 = results[1,12]
	putexcel B9 = results[1,13]

	* lci
	putexcel C3 = results[5,7]
	putexcel C4 = results[5,8]
	putexcel C5 = results[5,9]
	putexcel C6 = results[5,10]
	putexcel C7 = results[5,11]
	putexcel C8 = results[5,12]
	putexcel C9 = results[5,13]

	* uci
	putexcel D3 = results[6,7]
	putexcel D4 = results[6,8]
	putexcel D5 = results[6,9]
	putexcel D6 = results[6,10]
	putexcel D7 = results[6,11]
	putexcel D8 = results[6,12]
	putexcel D9 = results[6,13]

// GDP categories
matlist wpop_i_statin_`var'
matrix results = wpop_i_statin_`var'

	* est
	putexcel B11 = results[1,4]
	putexcel B12 = results[1,5]
	putexcel B13 = results[1,6]
	putexcel B14 = results[1,7]

	* lci
	putexcel C11 = results[5,4]
	putexcel C12 = results[5,5]
	putexcel C13 = results[5,6]
	putexcel C14 = results[5,7]

	* uci
	putexcel D11 = results[6,4]
	putexcel D12 = results[6,5]
	putexcel D13 = results[6,6]
	putexcel D14 = results[6,7]
	
// Overall
matlist wpop_ov_statin_`var'
matrix results = wpop_ov_statin_`var'

	* est
	putexcel B15 = results[1,2]
	
	* lci
	putexcel C15 = results[5,2]
	
	* uci
	putexcel D15 = results[6,2]
	
}
				
/*******************************************************************************
COMBINED FOREST PLOT FOR PRIMARY AND SECONDARY PREVENTION
*******************************************************************************/

frame create forestplotcomb
frame change forestplotcomb

cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper"
import excel "Putexcel tables/wpop forestplot_regiongdp sec ${date}.xlsx", sheet("Sheet1") cellrange(A1:D15) firstrow clear
gen id = _n
save "Temporary datasets/sec comb ${date}.dta", replace

import excel "Putexcel tables/wpop forestplot_regiongdp pri_lab ${date}.xlsx", sheet("Sheet1") cellrange(A1:D15) firstrow clear
gen id = _n
append using "Temporary datasets/sec comb ${date}.dta", nolabel gen(sec)

* Sorting
sort id sec

* Dropping and replacing duplicate headers
drop if inlist(category,"","Region","Income group") & sec == 1
replace category = "" if sec == 1

* Converting to %
replace est = est*100
replace lci = lci*100
replace uci = uci*100


* See help for what each of the "_USE" categories refers to	

	/*
	    The values of _USE or use are interpreted by forestplot in the following way:

        0 = subgroup labels (headings); general text
        1 = non-missing study estimates
        2 = missing study estimates
        3 = subgroup pooled effects
        4 = heterogeneity information; assumed to be present only in the first left-hand column
        5 = overall pooled effect
        6 = blank line
	*/

gen _USE = 6
replace _USE = 1 if !missing(est)
replace _USE = 0 if inlist(category,"Income group","Region")

* Making string variables	
gen est_s = string(est,"%9.1f")
gen lci_s = string(lci,"%9.1f")
gen uci_s = string(uci,"%9.1f")
gen output = est_s + " (" + lci_s + " to " + uci_s + ")"
replace output = "0 (ref)" if est == 0 | est == .
replace output = "" if est == .

* Realigning
leftalign


* Adding bolded labels		
label var category ""
replace category = `"{bf:"' + category + `"}"' if _USE==0
replace category = `"{bf:Overall}"' if category == "Overall"

label var output `"`"{bf:Statin use (%)}"'"'

* Re-ordering to get overall at the top
gen id2 = _n + 3
replace id2 = 3 if id2 == 25
replace id2 = 1 if id2 == 26
replace id2 = 2 if id2 == 27
gsort + id2

/*	Note: This code uses the "forestplot" program built by David Fisher. He has a number
	of very helpful posts on Statalist:
	
	https://www.statalist.org/forums/forum/general-stata-discussion/general/1471066-editing-headings-on-metan-forest-plots

	This bit of code helps with the plotid colors
	
	Set1 hexadecimal
		red: "#E41A1C"
		blue: "#377EB8"
		
	*/
	
* recast str21 category, force

forestplot est lci uci, ///
	labels(category) ///
	nostats /// this tells it not to calculate stats but rather use raw input data
	rcols(output) /// this tells forestplot which columsn to display
	range(0 65) /// range of the plot
	xlabel(0 20 40 60) /// plot labels
	ylabel(,grid) ///
	astext(50) ///
	xsize(4) //// % text in the plot
	spacing(1.5) /// this refers to spacing by line
	boxopts(mcolor(none)) ///
	pointopts(msymbol(square)) ///
	diamopts(color(black)) ///
	olineopts(lcolor(none)) ///
	plotregion(margin(l=0 r=0 b=0rs t=0) lcolor(none)) ///
	graphregion(margin(l=0 r=0 b=0rs t=0))	///
	name(wpop_forest_regdp_statin_comb,replace) ///
	plotid(sec) ///
		box1opts(mcolor(none)) ci1opts(lcolor(cranberry)) point1opts(mcolor(cranberry)) diam1opts(color(cranberry)) ///
		box2opts(mcolor(none)) ci2opts(lcolor(navy)) point2opts(mcolor(navy)) diam2opts(color(navy)) ///
	boxscale(60)  ///
	null(0) ///
	xtitle("Statin use (%)", size(2.5rs)  margin(l=0 r=0 b=2rs t=2rs)) //
	
* Fixing some minor formatting issues
gr_edit .plotregion1._xylines[1].style.editstyle linestyle(color(black)) editcopy

* Making the line
gr_edit .plotregion1.AddLine added_lines editor 50.57696737239915 .0341909628416472 50.57696737239915 24.7 /// 14.75215405672569
gr_edit .plotregion1.added_lines_new = 1
gr_edit .plotregion1.added_lines_rec = 1
gr_edit .plotregion1.added_lines[1].style.editstyle  linestyle( width( sztype(relative) val(.1) allow_pct(1)) color(black) pattern(dash) align(inside)) headstyle( symbol(circle) linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) fillcolor(black) size( sztype(relative) val(1.52778) allow_pct(1)) angle(stdarrow) symangle(zero) backsymbol(none) backline( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) backcolor(black) backsize( sztype(relative) val(0) allow_pct(1)) backangle(stdarrow) backsymangle(zero)) headpos(neither) editcopy

* Adding text
gr_edit .plotregion1.AddTextBox added_text editor 25.7 42
gr_edit .plotregion1.added_text_new = 1
gr_edit .plotregion1.added_text_rec = 1
gr_edit .plotregion1.added_text[1].style.editstyle  angle(default) size( sztype(relative) val(3.4722) allow_pct(1)) color(black) horizontal(left) vertical(middle) margin( gleft( sztype(gr_edit relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) linegap( gr_edit sztype(relative) val(0) allow_pct(1)) drawbox(no) boxmargin( gleft( sztype(relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) fillcolor(bluishgray) linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) box_alignment(east) editcopy
gr_edit .plotregion1.added_text[1].style.editstyle size(2.) editcopy
gr_edit .plotregion1.added_text[1].text = {}
gr_edit .plotregion1.added_text[1].text.Arrpush {it:WHO target}

* Changing colors to Set2 in R

	/* 	red "228 26 28" // E41A1C
		blue "55 126 184" //377EB8
	*/
	
	* Blue
gr_edit .plotregion1.plot7.style.editstyle marker(fillcolor("55 126 184")) editcopy
gr_edit .plotregion1.plot7.style.editstyle marker(linestyle(color("55 126 184"))) editcopy
gr_edit .plotregion1.plot4.style.editstyle area(linestyle(color("55 126 184"))) editcopy

	* Red
gr_edit .plotregion1.plot6.style.editstyle marker(fillcolor("228 26 28")) editcopy
gr_edit .plotregion1.plot6.style.editstyle marker(linestyle(color("228 26 28"))) editcopy
gr_edit .plotregion1.plot3.style.editstyle area(linestyle(color("228 26 28"))) editcopy

	graph save "Figures/wpop_forest_regiongdp statin comb ${date}.gph", replace         
	graph export "Figures/wpop_forest_regiongdp statin comb ${date}.pdf", as(pdf) name(wpop_forest_regdp_statin_comb) replace

frame change default
frame drop forestplotcomb

/*******************************************************************************
WPOP POOLED MAIN REGRESSIONS FOREST PLOTS
*******************************************************************************/

foreach var in sec pri_lab {

* This step is clunky but gets all the estimates in an easy step
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper"
putexcel set "Putexcel tables/wpop Statin_forest_plot `var' ${date}.xlsx", replace 

estimates restore wpop_pois_statin_`var' //  Restoring estimates	
estimates replay wpop_pois_statin_`var', baselevels // irr

putexcel (A1) = etable	// exporting the whole regression table

estimates restore wpop_margins_statin_`var' //  Restoring estimates	
estimates replay wpop_margins_statin_`var', baselevels

putexcel (H1) = etable	// exporting the whole regression table

frame create wpop_main_forestplot
frame change wpop_main_forestplot	
	
import excel "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/Putexcel tables/wpop Statin_forest_plot `var' ${date}.xlsx", sheet("Sheet1") cellrange(A2:N19) firstrow clear
	
* Renaming/dropping variables
	
	* Risk ratios
	drop StdErr
	drop t // t value
	rename Coef est_rr // IRR
	rename statin_`var' category
	rename F lci_rr
	rename G uci_rr
	rename Pt pvalue_rr

	* Margins
	rename dydx est_dydx
	drop H // repeat titles
	drop J // standard errors
	drop K // t value
	drop L // margins p-value (use from RR instead)
	rename M lci_dydx
	rename N uci_dydx
		
* Converting to % for margins
replace est_dydx = est_dydx*100
replace lci_dydx = lci_dydx*100
replace uci_dydx = uci_dydx*100
	
* See help for what each of the "_USE" categories refers to	
gen _USE = 1
replace _USE = 0 if inlist(category,"age_cat","sex","educat3","rural")	
replace _USE = 6 if missing(category)

* Naming the categories
replace category = "Age" if category == "age_cat"
replace category = "Sex" if category == "sex"
replace category = "Education" if category == "educat3"
replace category = "Rural residence" if category == "rural"

* Exponentiating
gen est_rr_exp = exp(est_rr)
gen lci_rr_exp = exp(lci_rr)
gen uci_rr_exp = exp(uci_rr)

* Making string variables	
gen pvalue_s = string(pvalue,"%4.3f")
replace pvalue_s = "<0.0001" if pvalue_rr <0.0001
replace pvalue_s = "" if pvalue_rr == .

gen est_rr_s = string(est_rr_exp,"%9.2f")
gen lci_rr_s = string(lci_rr_exp,"%9.2f")
gen uci_rr_s = string(uci_rr_exp,"%9.2f")

gen est_dydx_s = string(est_dydx,"%9.1f")
gen lci_dydx_s = string(lci_dydx,"%9.1f")
gen uci_dydx_s = string(uci_dydx,"%9.1f")

* Generating columns
gen column_rr = est_rr_s + " (" + lci_rr_s + " to " + uci_rr_s + ")"
replace column_rr = "1 (ref)" if est_rr == 0 | est_rr == .
replace column_rr = "" if est_rr == .

gen column_dydx = est_dydx_s + " (" + lci_dydx_s + " to " + uci_dydx_s + ")"
replace column_dydx = "0 (ref)" if est_dydx == 0 | est_dydx == .
replace column_dydx = "" if est_dydx == .

* Realiigning
leftalign

* Adding bolded labels		
label var category `"`"{bf:Characteristic}"'"'
replace category = `"{bf:"' + category + `"}"' if _USE==0

label var pvalue_s `"`"{bf:P value}"'"'
label var column_rr `"`"{bf:Risk ratio               }"' `"{bf:in statin use}"'"'
label var column_dydx `"`"{bf:Absolute difference}"' `"{bf:in statin use (%)}"'"'

/*	Note: This code uses the "forestplot" program built by David Fisher. He has a number
	of variable helpful posts on Statalist:
	
	https://www.statalist.org/forums/forum/general-stata-discussion/general/1471066-editing-headings-on-metan-forest-plots
*/

* recast str21 category, force
	
forestplot est_rr lci_rr uci_rr, ///
	eform /// exponentiate
	labels(category) ///
	nostats /// this tells it not to calculate stats but rather use raw input data
	rcols(column_rr column_dydx pvalue_s) /// this tells forestplot which columsn to display
	range(0.25 4) /// range of the plot
		xlabel(0.25 0.5 1 2 4) /// plot labels
	astext(80) /// % text in the plot
	spacing(1.3) /// this refers to spacing by line
	boxopts(mcolor(none)) ///
	pointopts(msymbol(square)) ///
	plotregion(margin(l=0 r=0 b=0rs t=0) lcolor(none)) ///
	graphregion(margin(l=0 r=0 b=0rs t=0))	///
	xsize(4) ///
	ysize(2.5) ///
	xtitle("Risk ratio", size(3.2rs)  margin(l=0 r=58rs b=2rs t=2rs)) ///
	name(wpop_main_forest_`var',replace) //
	
* Fixing some minor formatting issues	
gr_edit plotregion1._xylines[1].style.editstyle linestyle(color(black)) editcopy // gray line at top

	graph save "Figures/wpop_main_forest_`var'_ ${date}.gph", replace         
	graph export "Figures/wpop_main_forest_`var'_ ${date}.pdf", as(pdf) name(wpop_main_forest_`var') replace
		
frame change default
frame drop wpop_main_forestplot
	
}
	
////////////////////////////////////////////////////////////////////////////////
//////////////////////////// FIGURE 2 PLOTS /////////// ////////////////////////
////////////////////////////////////////////////////////////////////////////////

/*******************************************************************************
A. IMPORTING IN HEALTH SPENDING DATA
*******************************************************************************/

* Change frames
frame create health_spend
frame change health_spend
clear

* "GNI per capita, PPP (constant 2017 international $)"
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper"
import excel "External country data/Health expenditures/API_SH.XPD.CHEX.PP.CD_DS2_en_excel_v2_2168703", sheet("Data") cellrange(A4:BL268) firstrow
save "External country data/Health expenditures/WB Health Expenditures.dta", replace

* Renaming a few variables
rename CountryName country
rename CountryCode country_code

* Labeling columns
foreach v of varlist BA BB BC BD BE BF BG BH BI BJ BK BL {
	local x : variable label `v'
	rename `v' health_spend`x'
	}

* Renaming some countries
replace country = "Iran" if country == "Iran, Islamic Rep."
replace country = "Kyrgyzstan" if country == "Kyrgyz Republic"
replace country = "St. Vincent & the Grenadines" if country == "St. Vincent and the Grenadines"
replace country = "Timor Leste" if country == "Timor-Leste"
replace country = "Laos" if country == "Lao PDR"

* Reshape review: https://stats.idre.ucla.edu/stata/modules/reshaping-data-wide-to-long/
reshape long health_spend, i(country) j(year)

egen country_year = group(country year), label
decode country_year, gen(country_year_string)
keep country_year_string health_spend   // Removing unnecessary variables prior to merge

* Performing the merge
save "External country data/Health expenditures/WB Health Expenditures.dta", replace
frame change default
egen country_year = group(country year), label
decode country_year, gen(country_year_string)
merge m:1 country_year_string using "External country data/Health expenditures/WB Health Expenditures.dta"
drop if _merge == 2  // Drops the using only detritus
drop _merge

* Take a look at the data
list country ccode health_spend if country_id == 1

* The health spend per capita for a few countries added added manually
replace health_spend = 737.7508545 if country == "Jordan"  	// 2018 last available in Jordan
replace health_spend = 519.2666016 if country == "Mongolia"  // 2018 last available in Mongolia
replace health_spend = 180.4055023 if country == "Nepal" 	// 2018 last available in Nepal
replace health_spend = 442.6 if country == "Tokelau" 	// https://apps.who.int/iris/rest/bitstreams/1096380/retrieve
	
* Update country codes
replace ccode = "TK" if country == "Tokelau"
 
* Visualize the data
list country ccode health_spend health_spend health_spend if country_id == 1
drop country_year country_year_string
label drop country_year
frame drop health_spend

/*******************************************************************************
B. IMPORTING IN GDP/GNI DATA
*******************************************************************************/

* Change frames
frame create gni
frame change gni
clear

* Importing data downloaded on 10-Jun-2020 via: https://data.worldbank.org/indicator/NY.GDP.PCAP.PP.KD
* "GNI per capita, PPP (constant 2017 international $)"
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper"
import excel "External country data/GDP file/API_NY.GNP.PCAP.PP.KD_DS2_en_excel_v2_2167454", sheet("Data") cellrange(A4:BL268) firstrow
save "External country data/GDP file/WB gni.dta", replace

* Renaming a few variables
rename CountryName country
rename CountryCode country_code

* Labeling columns
foreach v of varlist BA BB BC BD BE BF BG BH BI BJ BK BL {
	local x : variable label `v'
	rename `v' gni`x'
	}

* Renaming some countries
replace country = "Iran" if country == "Iran, Islamic Rep."
replace country = "Kyrgyzstan" if country == "Kyrgyz Republic"
replace country = "St. Vincent & the Grenadines" if country == "St. Vincent and the Grenadines"
replace country = "Timor Leste" if country == "Timor-Leste"
replace country = "Laos" if country == "Lao PDR"

* Reshape review: https://stats.idre.ucla.edu/stata/modules/reshaping-data-wide-to-long/
reshape long gni, i(country) j(year)

egen country_year = group(country year), label
decode country_year, gen(country_year_string)
keep country_year_string gni   // Removing unnecessary variables prior to merge

* Performing the merge
save "External country data/GDP file/WB gni.dta", replace
frame change default
egen country_year = group(country year), label
decode country_year, gen(country_year_string)
merge m:1 country_year_string using "External country data/GDP file/WB gni.dta"
drop if _merge == 2  // Drops the using only detritus
drop _merge

* Take a look at the data
list country year gni if country_id == 1

* The GNI per capita for a few countries added added manually
replace gni = 2229.657978 if country == "Afghanistan"  	// 2017 Afghanistan only year available
replace gni = 11976.28186 if country == "Guyana"  	// 2017 year first year available
replace gni = 10890.96619 if country == "Iraq" 	// 2017 first year available
replace gni = 14932.2458 if country == "Nauru" 	// 2017 first year available
replace gni = 2612.622697 if country == "Solomon Islands"  	// 2017 first available
replace gni = 12158.00624 if country == "St. Vincent & the Grenadines" 	// 2017 first available
replace gni = 3614.103314 if country == "Tajikistan" 	// 2017 first available
replace gni = 13615.49103 if country == "Turkmenistan"  	// 2017 GDP
replace gni = 5575.167426 if country == "Tuvalu" 	// 2017 GDP

replace gni = 6275 if country == "Tokelau" 	/* 2015-16 GDP:
	
	https://www.tokelau.org.nz/Bulletin/April+2017/GDP+first.html 	*/

 
* Visualize the data
list country ccode year gni if country_id == 1
drop country_year country_year_string
label drop country_year
frame drop gni

/*******************************************************************************
C. IMPORTING IN DALYS
*******************************************************************************/

frame create dalys
frame change dalys

/* 	This file imports a CSV showing from GBD showing DALYs by country for ischemic stroke
	and ischemic heart disease. It then adds these up and prepares the data so that
	it can be merged by country.	*/

import delimited "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/External country data/DALYs/IHME-GBD_2019_DATA-39403fac-1.csv", clear

rename location_name country

* Preparing for reshape and reshaping
gen byte j = .
replace j = 1 if cause_name == "Ischemic heart disease"
replace j = 2 if cause_name == "Ischemic stroke"

keep country val j

reshape wide val, i(country) j(j)

* Summing ischemic heart disease + ischemic stroke
gen dalys = val1 + val2
sum dalys, d

drop val1 val2

* Tinkering with country names to facilitate the merge
replace country = "Iran" if country == "Iran (Islamic Republic of)"
replace country = "Moldova" if country == "Republic of Moldova"
replace country = "St. Vincent & the Grenadines" if country == "Saint Vincent and the Grenadines"
replace country = "Timor Leste" if country == "Timor-Leste"
replace country = "Vietnam" if country == "Viet Nam"

* Saving the file as a dta
save "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/External country data/DALYs/dalys 2021_04_21.dta", replace

frame change default
frame drop dalys

* Performing the merge
merge m:1 country using "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/External country data/DALYs/dalys 2021_04_21.dta"
drop if _merge == 2
drop _merge

* Data check
list country dalys if country_id == 1

/*******************************************************************************
EXPORTING THE DATA TO EXCEL FOR R REPEL SCATTERPLOTS
*******************************************************************************/	
	
/*	This data will be generated for use in ggplot2 in R, which permits repel-style
	scatterplot labeling. */	
	
* Generating the excel doc -----------------------------------------------------
			
	* Create the Excel doc
	cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper"
	putexcel set "Putexcel tables/country plot data ${date}.xlsx", replace 
		
	* Column: Country
	putexcel A1 = "country"
	
	sort country_encoded
	distinct country_encoded
	scalar n_countries = r(ndistinct)
	forval n = 1/`=scalar(n_countries)' {
	local row = `n'+1	
	putexcel A`row' = `"`:label (country_encoded) `n''"'
	}	
	
	* Column: Country code
	putexcel B1 = "ccode"
	
	forval n = 1/`=scalar(n_countries)' {
	local row = `n'+1
	sort country_id country_encoded
	putexcel B`row' = ccode[`n']
	}	
	
	* Column: Region
	putexcel C1 = "WHOregionclass_string"
	
	forval n = 1/`=scalar(n_countries)' {
	local row = `n'+1	
	sort country_id country_encoded
	putexcel C`row' = WHOregionclass_string[`n']
	}	
	
	* Column: Income group
	putexcel D1 = "countryGDPclass_string"
	
	forval n = 1/`=scalar(n_countries)' {
	local row = `n'+1	
	sort country_id country_encoded
	putexcel D`row' = countryGDPclass_string[`n']
	}	

	* Column: Statin use estimates primary prevention
	putexcel E1 = "est_pri_lab"
	putexcel F1 = "lci_pri_lab"
	putexcel G1 = "uci_pri_lab"
		
		local row = 2
		di `row'
		
		forval n = 1/`=scalar(n_countries)' {
				
			matlist wpop_c_statin_pri_lab
			matrix results = wpop_c_statin_pri_lab
						
			local b = results[1,"1.statin_pri_lab@`n'.country_encoded"]*100
			local lci = results[5,"1.statin_pri_lab@`n'.country_encoded"]*100
			local uci = results[6,"1.statin_pri_lab@`n'.country_encoded"]*100
			
			putexcel E`row' = `b'
			putexcel F`row' = `lci'
			putexcel G`row' = `uci'
			
			local row = `row'+1
			di `row'
		}
	
	* Column: Statin use estimates primary prevention
	putexcel H1 = "est_sec"
	putexcel I1 = "lci_sec"
	putexcel J1 = "uci_sec"
		
		local row = 2
		di `row'
		
		forval n = 1/`=scalar(n_countries)' {
				
			matlist wpop_c_statin_sec
			matrix results = wpop_c_statin_sec
						
			local b = results[1,"1.statin_sec@`n'.country_encoded"]*100
			local lci = results[5,"1.statin_sec@`n'.country_encoded"]*100
			local uci = results[6,"1.statin_sec@`n'.country_encoded"]*100
			
			putexcel H`row' = `b'
			putexcel I`row' = `lci'
			putexcel J`row' = `uci'
			
			local row = `row'+1
			di `row'
		}
		
	* Column: health_spend
	putexcel K1 = "health_spend"
	
	forval n = 1/`=scalar(n_countries)' {
	local row = `n'+1	
	sort country_id country_encoded
	putexcel K`row' = health_spend[`n']
	}
	
	* Column: gni
	putexcel L1 = "gni"
	
	forval n = 1/`=scalar(n_countries)' {
	local row = `n'+1	
	sort country_id country_encoded
	putexcel L`row' = gni[`n']
	}
	
	* Column: policy_score
	putexcel M1 = "policy_score"
	
	forval n = 1/`=scalar(n_countries)' {
	local row = `n'+1	
	sort country_id country_encoded
	putexcel M`row' = policy_score[`n']
	}
	
	* Column: DALYS
	putexcel N1 = "dalys"
	
	forval n = 1/`=scalar(n_countries)' {
	local row = `n'+1	
	sort country_id country_encoded
	putexcel N`row' = dalys[`n']
	}
	
////////////////////////////////////////////////////////////////////////////////
//////////////////////// FIGURE 3: WITHIN-COUNTRY REGRESSIONS //////////////////
////////////////////////////////////////////////////////////////////////////////

/*******************************************************************************
DEFINING THE COVARIATES
*******************************************************************************/

gen edbin = .
replace edbin = 0 if inlist(educat3,0,1)
replace edbin = 1 if inlist(educat3,2)

gen byte age_cat2 = .
replace age_cat2 = 1 if inrange(age,30,54.9)
replace age_cat2 = 2 if inrange(age,55,69.9)

/*******************************************************************************
RUNNING THE WITHIN-COUNTRY REGRESSIONS

Remember this is restricted to secondary prevention only and uses w_all weights
*******************************************************************************/

* Education and rural ----------------------------------------------------------
	foreach outcome of varlist statin_sec  	{

		foreach covariate of varlist edbin rural {

			foreach n of numlist 1(1)`=scalar(n_countries)'  {  // >=20 obs, >=1 statin user per group, >= 5 statin users
			
				list country if country_encoded == `n' & country_id == 1
				
				count if samp_sec_statin == 1 & `covariate' != .  & country_encoded == `n'
				scalar count_sample = r(N)
				
				count if `outcome' == 1 & `covariate' != . & country_encoded == `n'
				scalar count_outcome_all = r(N)
				
				count if `outcome' == 1 & `covariate' == 0 & country_encoded == `n'
				scalar count_outcome_0 = r(N)
				
				count if `outcome' == 1 & `covariate' == 1 & country_encoded == `n'
				scalar count_outcome_1 = r(N)
				
				if (`=scalar(count_sample)' >= 5 & `=scalar(count_outcome_all)'>= 5 & `=scalar(count_outcome_0)'>= 1 & `=scalar(count_outcome_1)'>= 1) {
				
					* Sample weights
					gen sample = 1 if `outcome' !=. & `covariate' != . & country_encoded == `n'

					bys country: egen sum_wpop_sample=sum(w_all) if sample==1   // pop weights not needed
					gen wpopnr_sample=w_all*population_2019_40_69/sum_wpop_sample

					* bys country: egen sum_weq_sample_reg=sum(w_all) if sample==1		
					* gen weqnr_sample=w_all/sum_weq_sample
									
					* Set svyset
					svyset psu_num [pw = wpopnr_sample], strata(stratum_num) singleunit(centered)
					
					* Regression
					svy: poisson `outcome' i.`covariate' i.age_cat i.sex if sample == 1, irr baselevels
					estimates store c_rr_`outcome'_`covariate'
					matrix c_rr_`outcome'_`covariate'_`n' = r(table)
						
					* Marginal effects
					margins, dydx(i.`covariate') vce(unconditional) post baselevels
					estimates store c_dydx_`outcome'_`covariate'
					matrix c_dydx_`outcome'_`covariate'_`n' = r(table)	
					
					* Drop variables for loop
					drop sample wpopnr_sample sum_wpop_sample
				
				}
				
				else { // This creates an empty matrix for countries with an insufficient sample to run the regressions
					di "insufficient sample"
					matrix c_rr_`outcome'_`covariate'_`n' = (.,.)
					matrix c_dydx_`outcome'_`covariate'_`n' = (.,.)
				}	
			}
		}
	}

* This is for sex alone --------------------------------------------------------
	foreach outcome of varlist statin_sec  	{

		foreach covariate of varlist sex { // sex

			foreach n of numlist 1(1)`=scalar(n_countries)'  {  // >=20 obs, >=1 statin user per group, >= 5 statin users
			
				list country if country_encoded == `n' & country_id == 1
				
				count if samp_sec_statin == 1 & `covariate' != .  & country_encoded == `n'
				scalar count_sample = r(N)
				
				count if `outcome' == 1 & `covariate' != . & country_encoded == `n'
				scalar count_outcome_all = r(N)
				
				count if `outcome' == 1 & `covariate' == 0 & country_encoded == `n'
				scalar count_outcome_0 = r(N)
				
				count if `outcome' == 1 & `covariate' == 1 & country_encoded == `n'
				scalar count_outcome_1 = r(N)
				
				if (`=scalar(count_sample)' >= 5 & `=scalar(count_outcome_all)'>= 5 & `=scalar(count_outcome_0)'>= 1 & `=scalar(count_outcome_1)'>= 1) {
								
					* Sample weights
					gen sample = 1 if `outcome' !=. & `covariate' != . & country_encoded == `n'

					bys country: egen sum_wpop_sample=sum(w_all) if sample==1   // pop weights not needed
					gen wpopnr_sample=w_all*population_2019_40_69/sum_wpop_sample

					* bys country: egen sum_weq_sample_reg=sum(w_all) if sample==1		
					* gen weqnr_sample=w_all/sum_weq_sample
					
					* Set svyset
					svyset psu_num [pw = wpopnr_sample], strata(stratum_num) singleunit(centered)
					
					* Regression
					svy: poisson  `outcome' i.`covariate' i.age_cat if sample == 1, irr baselevels
					estimates store c_rr_`outcome'_`covariate'
					matrix c_rr_`outcome'_`covariate'_`n' = r(table)
						
					* Marginal effects
					margins, dydx(i.`covariate') vce(unconditional) post baselevels
					estimates store c_dydx_`outcome'_`covariate'
					matrix c_dydx_`outcome'_`covariate'_`n' = r(table)	
					
					* Drop variables for loop
					drop sample wpopnr_sample sum_wpop_sample
				
				}
				
				else { // This creates an empty matrix for countries with an insufficient sample to run the regressions
					di "insufficient sample"
					matrix c_rr_`outcome'_`covariate'_`n' = (.,.)
					matrix c_dydx_`outcome'_`covariate'_`n' = (.,.)
				}	
			}
		}
	}

* This is for age --------------------------------------------------------------
	foreach outcome of varlist statin_sec {

		foreach covariate of varlist age_cat2 { // This limits the regressions to countries with 20 denominator AND 5 numerator

			foreach n of numlist 1(1)`=scalar(n_countries)'  {  //
			
				list country if country_encoded == `n' & country_id == 1
				
				count if samp_sec_statin == 1 & `covariate' != .  & country_encoded == `n'
				scalar count_sample = r(N)
				
				count if `outcome' == 1 & `covariate' != . & country_encoded == `n'
				scalar count_outcome_all = r(N)
				
				count if `outcome' == 1 & `covariate' == 1 & country_encoded == `n'
				scalar count_outcome_0 = r(N)
				
				count if `outcome' == 1 & `covariate' == 2 & country_encoded == `n'
				scalar count_outcome_1 = r(N)
				
				if (`=scalar(count_sample)' >= 5 & `=scalar(count_outcome_all)'>= 5 & `=scalar(count_outcome_0)'>= 1 & `=scalar(count_outcome_1)'>= 1) {
				
					* Sample weights
					gen sample = 1 if `outcome' !=. & `covariate' != . & country_encoded == `n'

					bys country: egen sum_wpop_sample=sum(w_all) if sample==1   // pop weights not needed
					gen wpopnr_sample=w_all*population_2019_40_69/sum_wpop_sample

					* bys country: egen sum_weq_sample_reg=sum(w_all) if sample==1		
					* gen weqnr_sample=w_all/sum_weq_sample
					
					* Set svyset
					svyset psu_num [pw = wpopnr_sample], strata(stratum_num) singleunit(centered)
					
					* Regression
					svy: poisson `outcome' i.`covariate' i.sex if sample == 1, irr baselevels
					estimates store c_rr_`outcome'_`covariate'
					matrix c_rr_`outcome'_`covariate'_`n' = r(table)
						
					* Marginal effects
					margins, dydx(`covariate') vce(unconditional) post baselevels
					estimates store c_dydx_`outcome'_`covariate'
					matrix c_dydx_`outcome'_`covariate'_`n' = r(table)	
					
					* Drop variables for loop
					drop sample wpopnr_sample sum_wpop_sample
				
				}
				
				else { // This creates an empty matrix for countries with an insufficient sample to run the regressions
					di "insufficient sample"
					matrix c_rr_`outcome'_`covariate'_`n' = (.,.)
					matrix c_dydx_`outcome'_`covariate'_`n' = (.,.)
				}		
			}	
		}
	}

/*******************************************************************************
EXPORTING COUNTRY-LEVEL RR AND MARGINS TO EXCEL FOR GRAPHING IN R
*******************************************************************************/

	/*	This step creates an Excel file with the risk ratios and average marginal
		effects for the country-level regressions. This Excel file is designed to
		subsequently be imported into R and the appropriate graphs generated
		using the "covariate_plots.R" files that I have generated.
	*/

* Secondary prevention ------------------------------------

	sort country_encoded

	* Create the Excel doc
	cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/Putexcel tables"
	putexcel set "country covariate plot statin_sec ${date}.xlsx", replace 
		
	* Make the table frame

				   
	* Column: Country
	putexcel A1 = "Country"
	sort country_id country_encoded
	forval n = 1/`=scalar(n_countries)' {
	local row = `n'+1	
	putexcel A`row' = country[`n']
	}		   
	
	* Column: Income group
	putexcel B1 = "WHOregionclass_string"
	sort country_id country_encoded
	forval n = 1/`=scalar(n_countries)' {
	local row = `n'+1	
	putexcel B`row' = WHOregionclass_string[`n']
	}
	
	* Column: Region
	putexcel C1 = "countryGDPclass_string"
	sort country_id country_encoded
	forval n = 1/`=scalar(n_countries)' {
	local row = `n'+1	
	putexcel C`row' = countryGDPclass_string[`n']
	}
	
	* Column: Ccode
	putexcel D1 = "ccode"
	forval n = 1/`=scalar(n_countries)' {
	local row = `n'+1
	sort country_id country_encoded
	putexcel D`row' = ccode[`n']
	}	
	
	* Column: statin_sec_sex
		
		putexcel E1 = "est_rr_statin_sec_sex"
		putexcel F1 = "lci_rr_statin_sec_sex"
		putexcel G1 = "uci_rr_statin_sec_sex"
		
		putexcel H1 = "est_dydx_statin_sec_sex"
		putexcel I1 = "lci_dydx_statin_sec_sex"
		putexcel J1 = "uci_dydx_statin_sec_sex"
							
		forval n = 1/`=scalar(n_countries)' {
			
				local row = 1+`n'
				di `row'
				
				matlist c_rr_statin_sec_sex_`n'
				matrix results = c_rr_statin_sec_sex_`n'
				
				local b = results[1,2]
				local lci = results[5,2]
				local uci = results[6,2]
				
				putexcel E`row' = `b'
				putexcel F`row' = `lci'
				putexcel G`row' = `uci'
				
				local row = `row'+1
				di `row'
			}
			
		forval n = 1/`=scalar(n_countries)' {
				
				local row = 1+`n'
				di `row'
				
				matlist c_dydx_statin_sec_sex_`n'
				matrix results = c_dydx_statin_sec_sex_`n'
				
				local b = results[1,2]
				local lci = results[5,2]
				local uci = results[6,2]
				
				putexcel H`row' = `b'
				putexcel I`row' = `lci'
				putexcel J`row' = `uci'
				
				local row = `row'+1
				di `row'
			}
	
	* Column: statin_rural
		
		putexcel K1 = "est_rr_statin_sec_rural"
		putexcel L1 = "lci_rr_statin_sec_rural"
		putexcel M1 = "uci_rr_statin_sec_rural"
		
		putexcel N1 = "est_dydx_statin_sec_rural"
		putexcel O1 = "lci_dydx_statin_sec_rural"
		putexcel P1 = "uci_dydx_statin_sec_rural"
							
		forval n = 1/`=scalar(n_countries)' {
			
				local row = 1+`n'
				di `row'
				
				matlist c_rr_statin_sec_rural_`n'
				matrix results = c_rr_statin_sec_rural_`n'
				
				local b = results[1,2]
				local lci = results[5,2]
				local uci = results[6,2]
				
				putexcel K`row' = `b'
				putexcel L`row' = `lci'
				putexcel M`row' = `uci'
				
				local row = `row'+1
				di `row'
			}
			
		forval n = 1/`=scalar(n_countries)' {
				
				local row = 1+`n'
				di `row'
				
				matlist c_dydx_statin_sec_rural_`n'
				matrix results = c_dydx_statin_sec_rural_`n'
				
				local b = results[1,2]
				local lci = results[5,2]
				local uci = results[6,2]
				
				putexcel N`row' = `b'
				putexcel O`row' = `lci'
				putexcel P`row' = `uci'
				
				local row = `row'+1
				di `row'
			}
	
	* Column: statin_edbin
		
		putexcel Q1 = "est_rr_statin_sec_edbin"
		putexcel R1 = "lci_rr_statin_sec_edbin"
		putexcel S1 = "uci_rr_statin_sec_edbin"
		
		putexcel T1 = "est_dydx_statin_sec_edbin"
		putexcel U1 = "lci_dydx_statin_sec_edbin"
		putexcel V1 = "uci_dydx_statin_sec_edbin"
							
		forval n = 1/`=scalar(n_countries)' {
			
				local row = 1+`n'
				di `row'
				
				matlist c_rr_statin_sec_edbin_`n'
				matrix results = c_rr_statin_sec_edbin_`n'
				
				local b = results[1,2]
				local lci = results[5,2]
				local uci = results[6,2]
				
				putexcel Q`row' = `b'
				putexcel R`row' = `lci'
				putexcel S`row' = `uci'
				
				local row = `row'+1
				di `row'
			}
			
		forval n = 1/`=scalar(n_countries)' {
				
				local row = 1+`n'
				di `row'
				
				matlist c_dydx_statin_sec_edbin_`n'
				matrix results = c_dydx_statin_sec_edbin_`n'
				
				local b = results[1,2]
				local lci = results[5,2]
				local uci = results[6,2]
				
				putexcel T`row' = `b'
				putexcel U`row' = `lci'
				putexcel V`row' = `uci'
				
				local row = `row'+1
				di `row'
			}
	
	* Column: statin_age_cat2
		
		putexcel W1 = "est_rr_statin_sec_age_cat2"
		putexcel X1 = "lci_rr_statin_sec_age_cat2"
		putexcel Y1 = "uci_rr_statin_sec_age_cat2"
		
		putexcel Z1 = "est_dydx_statin_sec_age_cat2"
		putexcel AA1 = "lci_dydx_statin_sec_age_cat2"
		putexcel AB1 = "uci_dydx_statin_sec_age_cat2"
							
		forval n = 1/`=scalar(n_countries)' {
			
				local row = 1+`n'
				di `row'
				
				matlist c_rr_statin_sec_age_cat2_`n'
				matrix results = c_rr_statin_sec_age_cat2_`n'
				
				local b = results[1,2]
				local lci = results[5,2]
				local uci = results[6,2]
				
				putexcel W`row' = `b'
				putexcel X`row' = `lci'
				putexcel Y`row' = `uci'
				
				local row = `row'+1
				di `row'
			}
			
		forval n = 1/`=scalar(n_countries)' {
				
				local row = 1+`n'
				di `row'
				
				matlist c_dydx_statin_sec_age_cat2_`n'
				matrix results = c_dydx_statin_sec_age_cat2_`n'
				
				local b = results[1,2]
				local lci = results[5,2]
				local uci = results[6,2]
				
				putexcel Z`row' = `b'
				putexcel AA`row' = `lci'
				putexcel AB`row' = `uci'
				
				local row = `row'+1
				di `row'
			}

/*******************************************************************************
GENERATING REGRESSION COEFFICIENTS AND R2 FOR COUNTRY-LEVEL REGRESSIONS
*******************************************************************************/

	/*	This step creates an Excel file with the risk ratios and average marginal
		effects for the country-level regressions. This Excel file is designed to
		subsequently be imported into R and the appropriate graphs generated
		using the "covariate_plots.R" files that I have generated.
	*/	
	
frame create regressioncoefficients
frame change regressioncoefficients
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/Putexcel tables"

import excel "country plot data ${date}.xlsx", sheet("Sheet1") firstrow clear

* Create the Excel doc
	cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/Putexcel tables"
	putexcel set "Standardized regression sentences ${date}.xlsx", replace 
		
	* Make the table frame
	putexcel A1 = "country_factor"
	putexcel A2 = "health_spend"
	putexcel A3 = "gni"
	putexcel A4 = "policy_score"
	putexcel A5 = "dalys"
	putexcel B1 = "text"
	putexcel C1 = "r2 primary"
	putexcel D1 = "r2 secondary"

/* See this nice link for manual calculation of standardized beta coefficients with CIs: https://www.statalist.org/forums/forum/general-stata-discussion/general/1430632-sem-or-95-ci-for-x-standardized-coefficient	*/

local n = 1

foreach var of varlist health_spend gni policy_score dalys {

local n = `n' + 1

* PRIMARY

	* test drive beta regression
	regress c.est_pri c.`var', beta

	* generate standardized values
	egen y_pri = std(est_pri) if e(sample)
	egen x_pri = std(`var') if e(sample)

	* regress to manually generate betas
	regress c.y_pri c.x_pri
	matlist r(table) 
	matrix temp_matrix = r(table)

	local beta_pri =string(temp_matrix[1,1],"%3.2f")
	di "`beta_pri'"

	local lci_pri =string(temp_matrix[5,1],"%3.2f")
	di "`lci_pri'"

	local uci_pri =string(temp_matrix[6,1],"%3.2f")
	di "`uci_pri'"

	local r2_pri = e(r2)
	di `r2_pri'
	
	
* SECONDARY

	* test drive beta regression
	regress c.est_sec c.`var', beta

	* generate standardized values
	egen y_sec = std(est_sec) if e(sample)
	egen x_sec = std(`var') if e(sample)

	* regress to manually generate betas
	regress c.y_sec c.x_sec
	matlist r(table) 
	matrix temp_matrix = r(table)

	local beta_sec =string(temp_matrix[1,1],"%3.2f")
	di "`beta_sec'"

	local lci_sec =string(temp_matrix[5,1],"%3.2f")
	di "`lci_sec'"

	local uci_sec =string(temp_matrix[6,1],"%3.2f")
	di "`uci_sec'"

	local r2_sec = e(r2)
	di `r2_sec'
	
putexcel B`n' = "The standardized regression coefficients were `beta_pri' (95% CI, `lci_pri' to `uci_pri') for primary prevention and `beta_sec' (95% CI, `lci_sec' to `uci_sec') for secondary prevention."
putexcel C`n' = `r2_pri'
putexcel D`n' = `r2_sec'

drop y_pri x_pri y_sec x_sec

}

frame change default
frame drop regressioncoefficients			
			
////////////////////////////////////////////////////////////////////////////////
//////////////////////// RANDOM OTHER CALCULATIONS /////////////////////////////
////////////////////////////////////////////////////////////////////////////////

/*******************************************************************************
SUMMARY ROW IN TABLE 1 SURVEY CHARACTERISTICS
*******************************************************************************/


frame create table_overall
frame change table_overall

import excel "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/Putexcel tables/Table survey characteristics ${date}.xlsx", sheet("Sheet1") firstrow clear

	* Response rate
	destring Responserate, gen(response_rate_num) force
	sum response_rate_num, d
	
	* Proportion female
	sum Proportionfemale, d

	* Median age range
	sum Proportionfemale, d
	
	gen median_age = substr(MedianageIQRyears,1,2)
	destring median_age, replace
	sum median_age, d

frame change default	
frame drop table_overall
	
/*******************************************************************************
POPULATION STUFF
*******************************************************************************/

* Calculating the proportion of represented population available in the pooled regression

gen byte rural_pop = 1 if country_id == 1
replace rural_pop = 0 if country_id == 1 & rural == .

list country rural_pop if country_id == 1

gen population_2019_40_69_mill = population_2019_40_69/1000000

tabstat population_2019_40_69_mill if country_id == 1 & rural_pop == 1, s(sum)
tabstat population_2019_40_69_mill if country_id == 1 & rural_pop == 0, s(sum)

di 385.2411/(48.91052+385.2411) // included rural countries still about 90% of underlying population in included countries

* Calculating the total population of countries included in the analysis
gen population_2019_all_ages_mill = population_2019_all_ages/1000000
tabstat population_2019_all_ages_mill if country_id == 1 & rural_pop == 1, s(sum)

	// so the surveys are representative of countries with a total population of 1 billion people

////////////////////////////////////////////////////////////////////////////////
///////////// TABLE 2: SHOWING POOLED RR AND MARGINS TOGETHER WPOP /////////////
////////////////////////////////////////////////////////////////////////////////

/* This code just puts into a table the estimates that have already been made
into a forestplot above. */

* Secondary prevention ------------------------------------

	sort country_encoded

	* Create the Excel doc
	cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/Putexcel tables"
	putexcel set "Main table - pooled rr and margins ${date}.xlsx", replace 
		
	* Make the table frame
	putexcel B1 = "Risk ratio in statin use"
	putexcel C1 = "Absolute difference in statin use (%)"
	putexcel D1 = "P value"
	
	putexcel A2 = "Primary prevention"
	putexcel A3 = "Age"
	putexcel A4 = "<50 years"
	putexcel A5 = "50-59 years"
	putexcel A6 = "60-69 years"
	putexcel A7 = "Sex"
	putexcel A8 = "Male"
	putexcel A9 = "Female"
	putexcel A10 = "Education"
	putexcel A11 = "No schooling"
	putexcel A12 = "Primary "
	putexcel A13 = "≥ Secondary "
	putexcel A14 = "Rural residence"
	putexcel A15 = "Urban"
	putexcel A16 = "Rural"

	putexcel A17 = "Primary prevention"
	putexcel A18 = "Age"
	putexcel A19 = "<50 years"
	putexcel A20 = "50-59 years"
	putexcel A21 = "60-69 years"
	putexcel A22 = "Sex"
	putexcel A23 = "Male"
	putexcel A24 = "Female"
	putexcel A25 = "Education"
	putexcel A26 = "No schooling"
	putexcel A27 = "Primary "
	putexcel A28 = "≥ Secondary "
	putexcel A29 = "Rural residence"
	putexcel A30 = "Urban"
	putexcel A31 = "Rural"
	
	putexcel B4 = "1 (ref)"
	putexcel B8 = "1 (ref)"
	putexcel B11 = "1 (ref)"
	putexcel B15 = "1 (ref)"
	putexcel B19 = "1 (ref)"
	putexcel B23 = "1 (ref)"
	putexcel B26 = "1 (ref)"
	putexcel B30 = "1 (ref)"
	
	putexcel C4 = "0 (ref)"
	putexcel C8 = "0 (ref)"
	putexcel C11 = "0 (ref)"
	putexcel C15 = "0 (ref)"
	putexcel C19 = "0 (ref)"
	putexcel C23 = "0 (ref)"
	putexcel C26 = "0 (ref)"
	putexcel C30 = "0 (ref)"
	
	* Primary prevention
	
	* Risk ratios
	
	matlist wpop_pois_statin_pri_lab
	matrix results = wpop_pois_statin_pri_lab
	
		* Age 50-59 years
			
			* Estimates
			local b = string(results[1,2],"%9.2f")
			local lci = string(results[5,2],"%9.2f")
			local uci = string(results[6,2],"%9.2f")
				
			putexcel B5 = "`b' (`lci' to `uci')"
			
			* P value
			local p = string(results[4,2],"%9.3f")
							
			if results[4,2] <0.0001 {
				putexcel D5 = "<0.0001"
			}
			
			else {
				putexcel D5 = `p'
			}	
	
		* Age 60-69 years
			
			* Estimates
			local b = string(results[1,3],"%9.2f")
			local lci = string(results[5,3],"%9.2f")
			local uci = string(results[6,3],"%9.2f")
				
			putexcel B6 = "`b' (`lci' to `uci')"
			
			* P value
			local p = string(results[4,3],"%9.3f")
							
			if results[4,3] <0.0001 {
				putexcel D6 = "<0.0001"
			}
			
			else {
				putexcel D6 = `p'
			}	
	
		* Sex female
			
			* Estimates
			local b = string(results[1,5],"%9.2f")
			local lci = string(results[5,5],"%9.2f")
			local uci = string(results[6,5],"%9.2f")
				
			putexcel B9 = "`b' (`lci' to `uci')"
			
			* P value
			local p = string(results[4,5],"%9.3f")
							
			if results[4,5] <0.0001 {
				putexcel D9 = "<0.0001"
			}
			
			else {
				putexcel D9 = `p'
			}
		
		* Education Primary 
			
			* Estimates
			local b = string(results[1,7],"%9.2f")
			local lci = string(results[5,7],"%9.2f")
			local uci = string(results[6,7],"%9.2f")
				
			putexcel B12 = "`b' (`lci' to `uci')"
			
			* P value
			local p = string(results[4,7],"%9.3f")
							
			if results[4,7] <0.0001 {
				putexcel D12 = "<0.0001"
			}
			
			else {
				putexcel D12 = `p'
			}	
	
		* Education ≥ Secondary  
			
			* Estimates
			local b = string(results[1,8],"%9.2f")
			local lci = string(results[5,8],"%9.2f")
			local uci = string(results[6,8],"%9.2f")
				
			putexcel B13 = "`b' (`lci' to `uci')"
			
			* P value
			local p = string(results[4,8],"%9.3f")
							
			if results[4,8] <0.0001 {
				putexcel D13 = "<0.0001"
			}
			
			else {
				putexcel D13 = `p'
			}	
			
		* Rural
			
			* Estimates
			local b = string(results[1,10],"%9.2f")
			local lci = string(results[5,10],"%9.2f")
			local uci = string(results[6,10],"%9.2f")
				
			putexcel B16 = "`b' (`lci' to `uci')"
			
			* P value
			local p = string(results[4,10],"%9.3f")
							
			if results[4,10] <0.0001 {
				putexcel D16 = "<0.0001"
			}
			
			else {
				putexcel D16 = `p'
			}
			
	matlist wpop_pois_statin_sec
	matrix results = wpop_pois_statin_sec
	
		* Age 50-59 years
			
			* Estimates
			local b = string(results[1,2],"%9.2f")
			local lci = string(results[5,2],"%9.2f")
			local uci = string(results[6,2],"%9.2f")
				
			putexcel B20 = "`b' (`lci' to `uci')"
			
			* P value
			local p = string(results[4,2],"%9.3f")
							
			if results[4,2] <0.0001 {
				putexcel D20 = "<0.0001"
			}
			
			else {
				putexcel D20 = `p'
			}	
	
		* Age 60-69 years
			
			* Estimates
			local b = string(results[1,3],"%9.2f")
			local lci = string(results[5,3],"%9.2f")
			local uci = string(results[6,3],"%9.2f")
				
			putexcel B21 = "`b' (`lci' to `uci')"
			
			* P value
			local p = string(results[4,3],"%9.3f")
							
			if results[4,3] <0.0001 {
				putexcel D21 = "<0.0001"
			}
			
			else {
				putexcel D21 = `p'
			}	
	
		* Sex female
			
			* Estimates
			local b = string(results[1,5],"%9.2f")
			local lci = string(results[5,5],"%9.2f")
			local uci = string(results[6,5],"%9.2f")
				
			putexcel B24 = "`b' (`lci' to `uci')"
			
			* P value
			local p = string(results[4,5],"%9.3f")
							
			if results[4,5] <0.0001 {
				putexcel D24 = "<0.0001"
			}
			
			else {
				putexcel D24 = `p'
			}
		
		* Education Primary 
			
			* Estimates
			local b = string(results[1,7],"%9.2f")
			local lci = string(results[5,7],"%9.2f")
			local uci = string(results[6,7],"%9.2f")
				
			putexcel B27 = "`b' (`lci' to `uci')"
			
			* P value
			local p = string(results[4,7],"%9.3f")
							
			if results[4,7] <0.0001 {
				putexcel D27 = "<0.0001"
			}
			
			else {
				putexcel D27 = `p'
			}	
			di `p'
	
		* Education ≥ Secondary  
			
			* Estimates
			local b = string(results[1,8],"%9.2f")
			local lci = string(results[5,8],"%9.2f")
			local uci = string(results[6,8],"%9.2f")
				
			putexcel B28 = "`b' (`lci' to `uci')"
			
			* P value
			local p = string(results[4,8],"%9.3f")
							
			if results[4,8] <0.0001 {
				putexcel D28 = "<0.0001"
			}
			
			else {
				putexcel D28 = `p'
			}	
			
		* Rural
			
			* Estimates
			local b = string(results[1,10],"%9.2f")
			local lci = string(results[5,10],"%9.2f")
			local uci = string(results[6,10],"%9.2f")
				
			putexcel B31 = "`b' (`lci' to `uci')"
			
			* P value
			local p = string(results[4,10],"%9.3f")
							
			if results[4,10] <0.0001 {
				putexcel D31 = "<0.0001"
			}
			
			else {
				putexcel D31 = `p'
			}
			
* Margins
	
	matlist wpop_margins_statin_pri_lab
	matrix results = wpop_margins_statin_pri_lab
	
		* Age 50-59 years
			
			* Estimates
			local b = string(results[1,2]*100,"%9.1f")
			local lci = string(results[5,2]*100,"%9.1f")
			local uci = string(results[6,2]*100,"%9.1f")
				
			putexcel C5 = "`b' (`lci' to `uci')"
	
		* Age 60-69 years
			
			* Estimates
			local b = string(results[1,3]*100,"%9.1f")
			local lci = string(results[5,3]*100,"%9.1f")
			local uci = string(results[6,3]*100,"%9.1f")
				
			putexcel C6 = "`b' (`lci' to `uci')"
	
		* Sex female
			
			* Estimates
			local b = string(results[1,5]*100,"%9.1f")
			local lci = string(results[5,5]*100,"%9.1f")
			local uci = string(results[6,5]*100,"%9.1f")
				
			putexcel C9 = "`b' (`lci' to `uci')"

		* Education Primary 
			
			* Estimates
			local b = string(results[1,7]*100,"%9.1f")
			local lci = string(results[5,7]*100,"%9.1f")
			local uci = string(results[6,7]*100,"%9.1f")
				
			putexcel C12 = "`b' (`lci' to `uci')"
	
		* Education ≥ Secondary  
			
			* Estimates
			local b = string(results[1,8]*100,"%9.1f")
			local lci = string(results[5,8]*100,"%9.1f")
			local uci = string(results[6,8]*100,"%9.1f")
				
			putexcel C13 = "`b' (`lci' to `uci')"
			
		* Rural
			
			* Estimates
			local b = string(results[1,10]*100,"%9.1f")
			local lci = string(results[5,10]*100,"%9.1f")
			local uci = string(results[6,10]*100,"%9.1f")
				
			putexcel C16 = "`b' (`lci' to `uci')"
			
	matlist wpop_margins_statin_sec
	matrix results = wpop_margins_statin_sec
	
		* Age 50-59 years
			
			* Estimates
			local b = string(results[1,2]*100,"%9.1f")
			local lci = string(results[5,2]*100,"%9.1f")
			local uci = string(results[6,2]*100,"%9.1f")
				
			putexcel C20 = "`b' (`lci' to `uci')"
	
		* Age 60-69 years
			
			* Estimates
			local b = string(results[1,3]*100,"%9.1f")
			local lci = string(results[5,3]*100,"%9.1f")
			local uci = string(results[6,3]*100,"%9.1f")
				
			putexcel C21 = "`b' (`lci' to `uci')"
	
		* Sex female
			
			* Estimates
			local b = string(results[1,5]*100,"%9.1f")
			local lci = string(results[5,5]*100,"%9.1f")
			local uci = string(results[6,5]*100,"%9.1f")
				
			putexcel C24 = "`b' (`lci' to `uci')"
		
		* Education Primary 
			
			* Estimates
			local b = string(results[1,7]*100,"%9.1f")
			local lci = string(results[5,7]*100,"%9.1f")
			local uci = string(results[6,7]*100,"%9.1f")
				
			putexcel C27 = "`b' (`lci' to `uci')"
	
		* Education ≥ Secondary  
			
			* Estimates
			local b = string(results[1,8]*100,"%9.1f")
			local lci = string(results[5,8]*100,"%9.1f")
			local uci = string(results[6,8]*100,"%9.1f")
				
			putexcel C28 = "`b' (`lci' to `uci')"
			
		* Rural
			
			* Estimates
			local b = string(results[1,10]*100,"%9.1f")
			local lci = string(results[5,10]*100,"%9.1f")
			local uci = string(results[6,10]*100,"%9.1f")
				
			putexcel C31 = "`b' (`lci' to `uci')"

////////////////////////////////////////////////////////////////////////////////
////////// SENSITIVITY ANALYSIS WITHOUT UNIVERSAL DIABETES INDICATION //////////
////////////////////////////////////////////////////////////////////////////////

/*******************************************************************************
GENERATING THE SAMPLE OF PARTICIPANTS
******************************************************************************/		

* Primary prevention denominator
gen byte samp_statin_nodm = 1 if inrange(age,40,69) & pregnant != 1 & mi == 0 & statin != . & risk_above_20_lab == 1
replace samp_statin_nodm = 0 if inrange(age,40,69) & pregnant != 1 & mi == 0 & statin != . & risk_above_20_lab == 0
tab country samp_statin_nodm
tab country samp_statin_nodm, m

/*******************************************************************************
GENERATING THE OUTCOMES
******************************************************************************/		

* Primary CVD prevention
gen byte statin_nodm = .
replace statin_nodm = 0 if samp_statin_nodm == 1 & statin == 0
replace statin_nodm = 1 if samp_statin_nodm == 1 & statin == 1
tab statin_nodm

/*******************************************************************************
ESTIMATIONS AND REGRESSION

This is the same exact same code as in the primary analysis, but using the different
outcomes defined above.
******************************************************************************/	

* Estimating proportions

	* Primary prevention outcome using fbg weights
	foreach var of varlist statin_nodm samp_statin_nodm {

		* Sample weights
		gen sample = 1 if `var' != .

		bys country: egen sum_wpop_sample=sum(wpop_fbg) if sample==1   // pop weights not needed
		gen wpopnr_sample=wpop_fbg*population_2019_40_69/sum_wpop_sample

		* bys country: egen sum_weq_sample=sum(weq_fbg) if sample==1		
		* gen weqnr_sample=weq_fbg/sum_weq_sample
		
		* Set svyset
		svyset psu_num [pw = wpopnr_sample], strata(stratum_num) singleunit(centered)
		
		* Estimates overall output
		svy, subpop(sample): proportion `var'
		estimates store wpop_ov_`var'
		matrix wpop_ov_`var' = r(table)
		
		* Estimates by country
		svy, subpop(sample): proportion `var', over(country_encoded)
		estimates store wpop_c_`var'
		matrix wpop_c_`var' = r(table)
		
		* Estimates by country group
		svy, subpop(sample): proportion `var', over(WHOregionclass)
		estimates store wpop_r_`var'
		matrix wpop_r_`var' = r(table)
		
		svy, subpop(sample): proportion `var', over(countryGDPclass)
		estimates store wpop_i_`var'
		matrix wpop_i_`var' = r(table)
		
		* Drop variables for loop
		drop sample wpopnr_sample sum_wpop_sample
	}

* Multivariable model

	* Primary prevention outcome using fbg weights
	foreach var of varlist statin_nodm {

		* Sample weights
		gen sample = 1 if `var' !=. & age !=. & sex !=. & educat3 !=. & rural !=.

		bys country: egen sum_wpop_sample=sum(wpop_fbg) if sample==1   // pop weights not needed
		gen wpopnr_sample=wpop_fbg*population_2019_40_69/sum_wpop_sample

		* bys country: egen sum_weq_sample=sum(weq_fbg) if sample==1		
		* gen weqnr_sample=weq_fbg/sum_weq_sample
		
		* Set svyset
		svyset psu_num [pw = wpopnr_sample], strata(stratum_num) singleunit(centered)
		
		* Regression
		svy: poisson `var' i.age_cat i.sex i.educat3 i.rural i.country_encoded if sample == 1, irr baselevels
		estimates store wpop_pois_`var'
		matrix wpop_pois_`var' = r(table)
			
		* Marginal effects
		margins, dydx(i.age_cat i.rural i.sex i.educat3) vce(unconditional) post baselevels
		estimates store wpop_margins_`var'
		matrix wpop_margins_`var' = r(table)	
		
		* Drop variables for loop
		drop sample wpopnr_sample sum_wpop_sample
	}

/*******************************************************************************
SEPARATE WPOP FORESTPLOT BY REGION, INCOME GROUP, AND OVERALL - NODM SENSITIVITY
*******************************************************************************/	

foreach var in nodm {
	
* This step is clunky but gets all the estimates in an easy step
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper"
putexcel set "Putexcel tables/wpop forestplot_regiongdp `var' ${date}.xlsx", replace 

putexcel A1 = "category"
putexcel B1 = "est"
putexcel C1 = "lci"
putexcel D1 = "uci"

putexcel A2 = "Region"
putexcel A3 = "Africa"
putexcel A4 = "Americas"
putexcel A5 = "South East Asia"
putexcel A6 = "Western Pacific"
putexcel A7 = "Europe"
putexcel A8 = "Eastern Mediterranean"

putexcel A10 = "Income group"
putexcel A11 = "Low income"
putexcel A12 = "Lower middle"
putexcel A13 = "Upper middle"

putexcel A15 = "Overall"

// Region
matlist wpop_r_statin_`var'
matrix results = wpop_r_statin_`var'

	* est
	putexcel B3 = results[1,7]
	putexcel B4 = results[1,8]
	putexcel B5 = results[1,9]
	putexcel B6 = results[1,10]
	putexcel B7 = results[1,11]
	putexcel B8 = results[1,12]
	putexcel B9 = results[1,13]

	* lci
	putexcel C3 = results[5,7]
	putexcel C4 = results[5,8]
	putexcel C5 = results[5,9]
	putexcel C6 = results[5,10]
	putexcel C7 = results[5,11]
	putexcel C8 = results[5,12]
	putexcel C9 = results[5,13]

	* uci
	putexcel D3 = results[6,7]
	putexcel D4 = results[6,8]
	putexcel D5 = results[6,9]
	putexcel D6 = results[6,10]
	putexcel D7 = results[6,11]
	putexcel D8 = results[6,12]
	putexcel D9 = results[6,13]

// GDP categories
matlist wpop_i_statin_`var'
matrix results = wpop_i_statin_`var'

	* est
	putexcel B11 = results[1,4]
	putexcel B12 = results[1,5]
	putexcel B13 = results[1,6]
	putexcel B14 = results[1,7]

	* lci
	putexcel C11 = results[5,4]
	putexcel C12 = results[5,5]
	putexcel C13 = results[5,6]
	putexcel C14 = results[5,7]

	* uci
	putexcel D11 = results[6,4]
	putexcel D12 = results[6,5]
	putexcel D13 = results[6,6]
	putexcel D14 = results[6,7]
	
// Overall
matlist wpop_ov_statin_`var'
matrix results = wpop_ov_statin_`var'

	* est
	putexcel B15 = results[1,2]
	
	* lci
	putexcel C15 = results[5,2]
	
	* uci
	putexcel D15 = results[6,2]
	
frame create wpop_forestplot_regiongdp
frame change wpop_forestplot_regiongdp	
	
import excel "Putexcel tables/wpop forestplot_regiongdp `var' ${date}.xlsx", sheet("Sheet1") cellrange(A1:D15) firstrow clear
	
* Converting to %	
replace est = est*100
replace lci = lci*100
replace uci = uci*100
	
* See help for what each of the "_USE" categories refers to	
gen _USE = 1
replace _USE = 0 if inlist(category,"Income group","Region")
replace _USE = 6 if missing(category)
replace _USE = 5 if category == "Overall"

* Making string variables	
gen est_s = string(est,"%9.1f")
gen lci_s = string(lci,"%9.1f")
gen uci_s = string(uci,"%9.1f")
gen output = est_s + " (" + lci_s + " to " + uci_s + ")"
replace output = "0 (ref)" if est == 0 | est == .
replace output = "" if est == .

* Realiigning
leftalign

* Adding bolded labels		
label var category `"`"{bf:Characteristic}"'"'
replace category = `"{bf:"' + category + `"}"' if _USE==0
replace category = `"{bf:Overall}"' if category == "Overall"

label var output `"`"{bf:Proportion using}"' `"{bf:statins (%)}"'"'	

/*	Note: This code uses the "forestplot" program built by David Fisher. He has a number
	of very helpful posts on Statalist:
	
	https://www.statalist.org/forums/forum/general-stata-discussion/general/1471066-editing-headings-on-metan-forest-plots
*/

* recast str21 category, force


forestplot est lci uci, ///
	labels(category) ///
	nostats /// this tells it not to calculate stats but rather use raw input data
	rcols(output) /// this tells forestplot which columsn to display
	range(0 60) /// range of the plot
	xlabel(0 20 40 60) /// plot labels
	ylabel(,grid) ///
	astext(60) ///
	xsize(4) /// % text in the plot
	ysize(3) ///
	spacing(1.5) /// this refers to spacing by line
	boxscale(80)  ///
	boxopts(mcolor(black)) ///
	pointopts(msymbol(square)) ///
	diamopts(color(black)) ///
	olineopts(lcolor(none)) ///
	plotregion(margin(l=0 r=0 b=0rs t=0) lcolor(none)) ///
	graphregion(margin(l=0 r=0 b=0rs t=0))	///
	null(0) ///
	xtitle("Statin use (%)", size(3.5rs)  margin(l=0 r=0 b=2rs t=2rs)) ///
	name(wpop_forest_regdp_statin_`var',replace) //
		
* Fixing some minor formatting issues
gr_edit .plotregion1._xylines[1].style.editstyle linestyle(color(black)) editcopy

* Making the line
gr_edit .plotregion1.AddLine added_lines editor 50.57696737239915 .0341909628416472 50.57696737239915 14.75215405672569
gr_edit .plotregion1.added_lines_new = 1
gr_edit .plotregion1.added_lines_rec = 1
gr_edit .plotregion1.added_lines[1].style.editstyle  linestyle( width( sztype(relative) val(.1) allow_pct(1)) color(black) pattern(dash) align(inside)) headstyle( symbol(circle) linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) fillcolor(black) size( sztype(relative) val(1.52778) allow_pct(1)) angle(stdarrow) symangle(zero) backsymbol(none) backline( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) backcolor(black) backsize( sztype(relative) val(0) allow_pct(1)) backangle(stdarrow) backsymangle(zero)) headpos(neither) editcopy

* Adding text
gr_edit .plotregion1.AddTextBox added_text editor 15.32558119025364 42.70517571788388
gr_edit .plotregion1.added_text_new = 1
gr_edit .plotregion1.added_text_rec = 1
gr_edit .plotregion1.added_text[1].style.editstyle  angle(default) size( sztype(relative) val(3.4722) allow_pct(1)) color(black) horizontal(left) vertical(middle) margin( gleft( sztype(gr_edit relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) linegap( gr_edit sztype(relative) val(0) allow_pct(1)) drawbox(no) boxmargin( gleft( sztype(relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) fillcolor(bluishgray) linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) box_alignment(east) editcopy
gr_edit .plotregion1.added_text[1].style.editstyle size(2.4) editcopy
gr_edit .plotregion1.added_text[1].text = {}
gr_edit .plotregion1.added_text[1].text.Arrpush {it:WHO target}

* Dragging text
gr_edit .plotregion1.added_text[1].DragBy .2389279723033106 -1

	graph save "Figures/wpop_forest_regiongdp statin `var' ${date}.gph", replace         
	graph export "Figures/wpop_forest_regiongdp statin `var' ${date}.pdf", as(pdf) name(wpop_forest_regdp_statin_`var') replace
	
	frame change default
	frame drop wpop_forestplot_regiongdp
}

/*******************************************************************************
WPOP FORESTPLOT - NODM SENSITIVITY
*******************************************************************************/

foreach var in nodm {

* This step is clunky but gets all the estimates in an easy step
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper"
putexcel set "Putexcel tables/wpop Statin_forest_plot `var' ${date}.xlsx", replace 

estimates restore wpop_pois_statin_`var' //  Restoring estimates	
estimates replay wpop_pois_statin_`var', baselevels // irr

putexcel (A1) = etable	// exporting the whole regression table

estimates restore wpop_margins_statin_`var' //  Restoring estimates	
estimates replay wpop_margins_statin_`var', baselevels

putexcel (H1) = etable	// exporting the whole regression table

frame create wpop_main_forestplot
frame change wpop_main_forestplot	
	
import excel "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/Putexcel tables/wpop Statin_forest_plot `var' ${date}.xlsx", sheet("Sheet1") cellrange(A2:N19) firstrow clear
	
* Renaming/dropping variables
	
	* Risk ratios
	drop StdErr
	drop t // t value
	rename Coef est_rr // IRR
	rename statin_`var' category
	rename F lci_rr
	rename G uci_rr
	rename Pt pvalue_rr

	* Margins
	rename dydx est_dydx
	drop H // repeat titles
	drop J // standard errors
	drop K // t value
	drop L // margins p-value (use from RR instead)
	rename M lci_dydx
	rename N uci_dydx
		
* Converting to % for margins
replace est_dydx = est_dydx*100
replace lci_dydx = lci_dydx*100
replace uci_dydx = uci_dydx*100
	
* See help for what each of the "_USE" categories refers to	
gen _USE = 1
replace _USE = 0 if inlist(category,"age_cat","sex","educat3","rural")	
replace _USE = 6 if missing(category)

* Naming the categories
replace category = "Age" if category == "age_cat"
replace category = "Sex" if category == "sex"
replace category = "Education" if category == "educat3"
replace category = "Rural residence" if category == "rural"

* Exponentiating
gen est_rr_exp = exp(est_rr)
gen lci_rr_exp = exp(lci_rr)
gen uci_rr_exp = exp(uci_rr)

* Making string variables	
gen pvalue_s = string(pvalue,"%4.3f")
replace pvalue_s = "<0.0001" if pvalue_rr <0.0001
replace pvalue_s = "" if pvalue_rr == .

gen est_rr_s = string(est_rr_exp,"%9.2f")
gen lci_rr_s = string(lci_rr_exp,"%9.2f")
gen uci_rr_s = string(uci_rr_exp,"%9.2f")

gen est_dydx_s = string(est_dydx,"%9.1f")
gen lci_dydx_s = string(lci_dydx,"%9.1f")
gen uci_dydx_s = string(uci_dydx,"%9.1f")

* Generating columns
gen column_rr = est_rr_s + " (" + lci_rr_s + " to " + uci_rr_s + ")"
replace column_rr = "1 (ref)" if est_rr == 0 | est_rr == .
replace column_rr = "" if est_rr == .

gen column_dydx = est_dydx_s + " (" + lci_dydx_s + " to " + uci_dydx_s + ")"
replace column_dydx = "0 (ref)" if est_dydx == 0 | est_dydx == .
replace column_dydx = "" if est_dydx == .

* Realiigning
leftalign

* Adding bolded labels		
label var category `"`"{bf:Characteristic}"'"'
replace category = `"{bf:"' + category + `"}"' if _USE==0

label var pvalue_s `"`"{bf:P value}"'"'
label var column_rr `"`"{bf:Risk ratio               }"' `"{bf:in statin use}"'"'
label var column_dydx `"`"{bf:Absolute difference}"' `"{bf:in statin use (%)}"'"'

/*	Note: This code uses the "forestplot" program built by David Fisher. He has a number
	of variable helpful posts on Statalist:
	
	https://www.statalist.org/forums/forum/general-stata-discussion/general/1471066-editing-headings-on-metan-forest-plots
*/

* recast str21 category, force
	
forestplot est_rr lci_rr uci_rr, ///
	eform /// exponentiate
	labels(category) ///
	nostats /// this tells it not to calculate stats but rather use raw input data
	rcols(column_rr column_dydx pvalue_s) /// this tells forestplot which columsn to display
	range(0.25 4) /// range of the plot
		xlabel(0.25 0.5 1 2 4) /// plot labels
	astext(80) /// % text in the plot
	spacing(1.3) /// this refers to spacing by line
	boxopts(mcolor(none)) ///
	pointopts(msymbol(square)) ///
	plotregion(margin(l=0 r=0 b=0rs t=0) lcolor(none)) ///
	graphregion(margin(l=0 r=0 b=0rs t=0))	///
	xsize(4) ///
	ysize(2.5) ///
	xtitle("Risk ratio", size(3.2rs)  margin(l=0 r=58rs b=2rs t=2rs)) ///
	name(wpop_main_forest_`var',replace) //
	
* Fixing some minor formatting issues	
gr_edit plotregion1._xylines[1].style.editstyle linestyle(color(black)) editcopy // gray line at top

	graph save "Figures/wpop_main_forest_`var'_ ${date}.gph", replace         
	graph export "Figures/wpop_main_forest_`var'_ ${date}.pdf", as(pdf) name(wpop_main_forest_`var') replace
		
frame change default
frame drop wpop_main_forestplot
	
}
			
////////////////////////////////////////////////////////////////////////////////
//////////////// SENSITIVITY ANALYSES USING 2007 WHO/ISH SCORES ////////////////
////////////////////////////////////////////////////////////////////////////////

/*******************************************************************************
GENERATING THE RISK CATEGORIES
******************************************************************************/	

	/*
		codebook who2007cvdrisk
		
			151,702       1  <10%
			6,647         2  10% to <20%
			3,037         3  20% to <30%
			1,426         4  30% to <40%
			1,785         5  >=40%
			74,252        .  
	*/

gen risk_above_30_ish =.
replace risk_above_30_ish = 1 if inlist(who2007cvdrisk,4,5) // this is actually >=30%
replace risk_above_30_ish = 0 if inlist(who2007cvdrisk,1,2,3)
replace risk_above_30_ish = . if missing(who2007cvdrisk)

tab country risk_above_30_ish if inrange(age,40,69), missing

/*******************************************************************************
GENERATING THE SAMPLE OF PARTICIPANTS
******************************************************************************/		
	
* Primary prevention denominator
gen byte samp_ish_statin = 1 if inrange(age,40,69) & pregnant != 1 & mi == 0 & statin != . & (risk_above_30_ish == 1 | (hbg_new == 1 & clin_dia == 1))
replace samp_ish_statin = 0 if inrange(age,40,69) & pregnant != 1 & mi == 0 & statin != . & (risk_above_30_ish == 0 & (hbg_new == 0 | clin_dia == 0))
tab country samp_ish_statin
tab country samp_ish_statin, m

/*******************************************************************************
GENERATING THE OUTCOMES
******************************************************************************/		

* Primary CVD prevention
gen byte statin_ish = .
replace statin_ish = 0 if samp_ish == 1 & statin == 0
replace statin_ish = 1 if samp_ish == 1 & statin == 1
tab statin_ish

* Among people using meds -> see who2007cvdrisk categories above
gen statin_den_ish = .
replace statin_den_ish = 1 if mi == 1 & statin == 1 & inrange(age,18,69) & pregnant != 1
replace statin_den_ish = 2 if mi == 0 & statin == 1 & inlist(who2007cvdrisk,3,4,5) & inrange(age,18,69) & pregnant != 1 //
replace statin_den_ish = 3 if mi == 0 & statin == 1 & inlist(who2007cvdrisk,2) & inrange(age,18,69) & pregnant != 1
replace statin_den_ish = 4 if mi == 0 & statin == 1 & inlist(who2007cvdrisk,1) & inrange(age,18,69) & pregnant != 1
replace statin_den_ish = . if country == "Iraq" // Take care here -> Iraq CVD med question was conditional on reporting an MI/stroke
tab country statin_den_ish

/*******************************************************************************
ESTIMATIONS AND REGRESSION

This is the same exact same code as in the primary analysis, but using the different
outcomes defined above.
******************************************************************************/	

* Estimating proportions

	* Primary prevention outcome using fbg weights
	foreach var of varlist statin_ish {

		* Sample weights
		gen sample = 1 if `var' != .

		bys country: egen sum_wpop_sample=sum(wpop_fbg) if sample==1   // pop weights not needed
		gen wpopnr_sample=wpop_fbg*population_2019_40_69/sum_wpop_sample

		* bys country: egen sum_weq_sample=sum(weq_fbg) if sample==1		
		* gen weqnr_sample=weq_fbg/sum_weq_sample
		
		* Set svyset
		svyset psu_num [pw = wpopnr_sample], strata(stratum_num) singleunit(centered)
		
		* Estimates overall output
		svy, subpop(sample): proportion `var'
		estimates store wpop_ov_`var'
		matrix wpop_ov_`var' = r(table)
		
		* Estimates by country
		svy, subpop(sample): proportion `var', over(country_encoded)
		estimates store wpop_c_`var'
		matrix wpop_c_`var' = r(table)
		
		* Estimates by country group
		svy, subpop(sample): proportion `var', over(WHOregionclass)
		estimates store wpop_r_`var'
		matrix wpop_r_`var' = r(table)
		
		svy, subpop(sample): proportion `var', over(countryGDPclass)
		estimates store wpop_i_`var'
		matrix wpop_i_`var' = r(table)
		
		* Drop variables for loop
		drop sample wpopnr_sample sum_wpop_sample
	}

* Multivariable model

	* Primary prevention outcome using fbg weights
	foreach var of varlist statin_ish {

		* Sample weights
		gen sample = 1 if `var' !=. & age !=. & sex !=. & educat3 !=. & rural !=.

		bys country: egen sum_wpop_sample=sum(wpop_fbg) if sample==1   // pop weights not needed
		gen wpopnr_sample=wpop_fbg*population_2019_40_69/sum_wpop_sample

		* bys country: egen sum_weq_sample=sum(weq_fbg) if sample==1		
		* gen weqnr_sample=weq_fbg/sum_weq_sample
		
		* Set svyset
		svyset psu_num [pw = wpopnr_sample], strata(stratum_num) singleunit(centered)
		
		* Regression
		svy: poisson `var' i.age_cat i.sex i.educat3 i.rural i.country_encoded if sample == 1, irr baselevels
		estimates store wpop_pois_`var'
		matrix wpop_pois_`var' = r(table)
			
		* Marginal effects
		margins, dydx(i.age_cat i.rural i.sex i.educat3) vce(unconditional) post baselevels
		estimates store wpop_margins_`var'
		matrix wpop_margins_`var' = r(table)	
		
		* Drop variables for loop
		drop sample wpopnr_sample sum_wpop_sample
	}

/*******************************************************************************
SEPARATE WPOP FORESTPLOT BY REGION, INCOME GROUP, AND OVERALL - ISH SENSITIVITY
*******************************************************************************/	

foreach var in ish {
	
* This step is clunky but gets all the estimates in an easy step
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper"
putexcel set "Putexcel tables/wpop forestplot_regiongdp `var' ${date}.xlsx", replace 

putexcel A1 = "category"
putexcel B1 = "est"
putexcel C1 = "lci"
putexcel D1 = "uci"

putexcel A2 = "Region"
putexcel A3 = "Africa"
putexcel A4 = "Americas"
putexcel A5 = "South East Asia"
putexcel A6 = "Western Pacific"
putexcel A7 = "Europe"
putexcel A8 = "Eastern Mediterranean"

putexcel A10 = "Income group"
putexcel A11 = "Low income"
putexcel A12 = "Lower middle"
putexcel A13 = "Upper middle"

putexcel A15 = "Overall"

// Region
matlist wpop_r_statin_`var'
matrix results = wpop_r_statin_`var'

	* est
	putexcel B3 = results[1,7]
	putexcel B4 = results[1,8]
	putexcel B5 = results[1,9]
	putexcel B6 = results[1,10]
	putexcel B7 = results[1,11]
	putexcel B8 = results[1,12]
	putexcel B9 = results[1,13]

	* lci
	putexcel C3 = results[5,7]
	putexcel C4 = results[5,8]
	putexcel C5 = results[5,9]
	putexcel C6 = results[5,10]
	putexcel C7 = results[5,11]
	putexcel C8 = results[5,12]
	putexcel C9 = results[5,13]

	* uci
	putexcel D3 = results[6,7]
	putexcel D4 = results[6,8]
	putexcel D5 = results[6,9]
	putexcel D6 = results[6,10]
	putexcel D7 = results[6,11]
	putexcel D8 = results[6,12]
	putexcel D9 = results[6,13]

// GDP categories
matlist wpop_i_statin_`var'
matrix results = wpop_i_statin_`var'

	* est
	putexcel B11 = results[1,4]
	putexcel B12 = results[1,5]
	putexcel B13 = results[1,6]
	putexcel B14 = results[1,7]

	* lci
	putexcel C11 = results[5,4]
	putexcel C12 = results[5,5]
	putexcel C13 = results[5,6]
	putexcel C14 = results[5,7]

	* uci
	putexcel D11 = results[6,4]
	putexcel D12 = results[6,5]
	putexcel D13 = results[6,6]
	putexcel D14 = results[6,7]
	
// Overall
matlist wpop_ov_statin_`var'
matrix results = wpop_ov_statin_`var'

	* est
	putexcel B15 = results[1,2]
	
	* lci
	putexcel C15 = results[5,2]
	
	* uci
	putexcel D15 = results[6,2]
	
frame create wpop_forestplot_regiongdp
frame change wpop_forestplot_regiongdp	
	
import excel "Putexcel tables/wpop forestplot_regiongdp `var' ${date}.xlsx", sheet("Sheet1") cellrange(A1:D15) firstrow clear
	
* Converting to %	
replace est = est*100
replace lci = lci*100
replace uci = uci*100
	
* See help for what each of the "_USE" categories refers to	
gen _USE = 1
replace _USE = 0 if inlist(category,"Income group","Region")
replace _USE = 6 if missing(category)
replace _USE = 5 if category == "Overall"

* Making string variables	
gen est_s = string(est,"%9.1f")
gen lci_s = string(lci,"%9.1f")
gen uci_s = string(uci,"%9.1f")
gen output = est_s + " (" + lci_s + " to " + uci_s + ")"
replace output = "0 (ref)" if est == 0 | est == .
replace output = "" if est == .

* Realiigning
leftalign

* Adding bolded labels		
label var category `"`"{bf:Characteristic}"'"'
replace category = `"{bf:"' + category + `"}"' if _USE==0
replace category = `"{bf:Overall}"' if category == "Overall"

label var output `"`"{bf:Proportion using}"' `"{bf:statins (%)}"'"'

/*	Note: This code uses the "forestplot" program built by David Fisher. He has a number
	of very helpful posts on Statalist:
	
	https://www.statalist.org/forums/forum/general-stata-discussion/general/1471066-editing-headings-on-metan-forest-plots
*/

* recast str21 category, force


forestplot est lci uci, ///
	labels(category) ///
	nostats /// this tells it not to calculate stats but rather use raw input data
	rcols(output) /// this tells forestplot which columsn to display
	range(0 60) /// range of the plot
	xlabel(0 20 40 60) /// plot labels
	ylabel(,grid) ///
	astext(60) ///
	xsize(4) /// % text in the plot
	ysize(3) ///
	spacing(1.5) /// this refers to spacing by line
	boxscale(80)  ///
	boxopts(mcolor(black)) ///
	pointopts(msymbol(square)) ///
	diamopts(color(black)) ///
	olineopts(lcolor(none)) ///
	plotregion(margin(l=0 r=0 b=0rs t=0) lcolor(none)) ///
	graphregion(margin(l=0 r=0 b=0rs t=0))	///
	null(0) ///
	xtitle("Statin use (%)", size(3.5rs)  margin(l=0 r=0 b=2rs t=2rs)) ///
	name(wpop_forest_regdp_statin_`var',replace) //
		
* Fixing some minor formatting issues
gr_edit .plotregion1._xylines[1].style.editstyle linestyle(color(black)) editcopy

* Making the line
gr_edit .plotregion1.AddLine added_lines editor 50.57696737239915 .0341909628416472 50.57696737239915 14.75215405672569
gr_edit .plotregion1.added_lines_new = 1
gr_edit .plotregion1.added_lines_rec = 1
gr_edit .plotregion1.added_lines[1].style.editstyle  linestyle( width( sztype(relative) val(.1) allow_pct(1)) color(black) pattern(dash) align(inside)) headstyle( symbol(circle) linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) fillcolor(black) size( sztype(relative) val(1.52778) allow_pct(1)) angle(stdarrow) symangle(zero) backsymbol(none) backline( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) backcolor(black) backsize( sztype(relative) val(0) allow_pct(1)) backangle(stdarrow) backsymangle(zero)) headpos(neither) editcopy

* Adding text
gr_edit .plotregion1.AddTextBox added_text editor 15.32558119025364 42.70517571788388
gr_edit .plotregion1.added_text_new = 1
gr_edit .plotregion1.added_text_rec = 1
gr_edit .plotregion1.added_text[1].style.editstyle  angle(default) size( sztype(relative) val(3.4722) allow_pct(1)) color(black) horizontal(left) vertical(middle) margin( gleft( sztype(gr_edit relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) linegap( gr_edit sztype(relative) val(0) allow_pct(1)) drawbox(no) boxmargin( gleft( sztype(relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) fillcolor(bluishgray) linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) box_alignment(east) editcopy
gr_edit .plotregion1.added_text[1].style.editstyle size(2.4) editcopy
gr_edit .plotregion1.added_text[1].text = {}
gr_edit .plotregion1.added_text[1].text.Arrpush {it:WHO target}

* Dragging text
gr_edit .plotregion1.added_text[1].DragBy .2389279723033106 -1

	graph save "Figures/wpop_forest_regiongdp statin `var' ${date}.gph", replace         
	graph export "Figures/wpop_forest_regiongdp statin `var' ${date}.pdf", as(pdf) name(wpop_forest_regdp_statin_`var') replace
	
	frame change default
	frame drop wpop_forestplot_regiongdp
}

/*******************************************************************************
WPOP FORESTPLOT - ISH SENSITIVITY
*******************************************************************************/

foreach var in ish {

* This step is clunky but gets all the estimates in an easy step
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper"
putexcel set "Putexcel tables/wpop Statin_forest_plot `var' ${date}.xlsx", replace 

estimates restore wpop_pois_statin_`var' //  Restoring estimates	
estimates replay wpop_pois_statin_`var', baselevels // irr

putexcel (A1) = etable	// exporting the whole regression table

estimates restore wpop_margins_statin_`var' //  Restoring estimates	
estimates replay wpop_margins_statin_`var', baselevels

putexcel (H1) = etable	// exporting the whole regression table

frame create wpop_main_forestplot
frame change wpop_main_forestplot	
	
import excel "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/Putexcel tables/wpop Statin_forest_plot `var' ${date}.xlsx", sheet("Sheet1") cellrange(A2:N19) firstrow clear
	
* Renaming/dropping variables
	
	* Risk ratios
	drop StdErr
	drop t // t value
	rename Coef est_rr // IRR
	rename statin_`var' category
	rename F lci_rr
	rename G uci_rr
	rename Pt pvalue_rr

	* Margins
	rename dydx est_dydx
	drop H // repeat titles
	drop J // standard errors
	drop K // t value
	drop L // margins p-value (use from RR instead)
	rename M lci_dydx
	rename N uci_dydx
		
* Converting to % for margins
replace est_dydx = est_dydx*100
replace lci_dydx = lci_dydx*100
replace uci_dydx = uci_dydx*100
	
* See help for what each of the "_USE" categories refers to	
gen _USE = 1
replace _USE = 0 if inlist(category,"age_cat","sex","educat3","rural")	
replace _USE = 6 if missing(category)

* Naming the categories
replace category = "Age" if category == "age_cat"
replace category = "Sex" if category == "sex"
replace category = "Education" if category == "educat3"
replace category = "Rural residence" if category == "rural"

* Exponentiating
gen est_rr_exp = exp(est_rr)
gen lci_rr_exp = exp(lci_rr)
gen uci_rr_exp = exp(uci_rr)

* Making string variables	
gen pvalue_s = string(pvalue,"%4.3f")
replace pvalue_s = "<0.0001" if pvalue_rr <0.0001
replace pvalue_s = "" if pvalue_rr == .

gen est_rr_s = string(est_rr_exp,"%9.2f")
gen lci_rr_s = string(lci_rr_exp,"%9.2f")
gen uci_rr_s = string(uci_rr_exp,"%9.2f")

gen est_dydx_s = string(est_dydx,"%9.1f")
gen lci_dydx_s = string(lci_dydx,"%9.1f")
gen uci_dydx_s = string(uci_dydx,"%9.1f")

* Generating columns
gen column_rr = est_rr_s + " (" + lci_rr_s + " to " + uci_rr_s + ")"
replace column_rr = "1 (ref)" if est_rr == 0 | est_rr == .
replace column_rr = "" if est_rr == .

gen column_dydx = est_dydx_s + " (" + lci_dydx_s + " to " + uci_dydx_s + ")"
replace column_dydx = "0 (ref)" if est_dydx == 0 | est_dydx == .
replace column_dydx = "" if est_dydx == .

* Realiigning
leftalign

* Adding bolded labels		
label var category `"`"{bf:Characteristic}"'"'
replace category = `"{bf:"' + category + `"}"' if _USE==0

label var pvalue_s `"`"{bf:P value}"'"'
label var column_rr `"`"{bf:Risk ratio               }"' `"{bf:in statin use}"'"'
label var column_dydx `"`"{bf:Absolute difference}"' `"{bf:in statin use (%)}"'"'

/*	Note: This code uses the "forestplot" program built by David Fisher. He has a number
	of variable helpful posts on Statalist:
	
	https://www.statalist.org/forums/forum/general-stata-discussion/general/1471066-editing-headings-on-metan-forest-plots
*/

* recast str21 category, force
	
forestplot est_rr lci_rr uci_rr, ///
	eform /// exponentiate
	labels(category) ///
	nostats /// this tells it not to calculate stats but rather use raw input data
	rcols(column_rr column_dydx pvalue_s) /// this tells forestplot which columsn to display
	range(0.25 4) /// range of the plot
		xlabel(0.25 0.5 1 2 4) /// plot labels
	astext(80) /// % text in the plot
	spacing(1.3) /// this refers to spacing by line
	boxopts(mcolor(none)) ///
	pointopts(msymbol(square)) ///
	plotregion(margin(l=0 r=0 b=0rs t=0) lcolor(none)) ///
	graphregion(margin(l=0 r=0 b=0rs t=0))	///
	xsize(4) ///
	ysize(2.5) ///
	xtitle("Risk ratio", size(3.2rs)  margin(l=0 r=58rs b=2rs t=2rs)) ///
	name(wpop_main_forest_`var',replace) //
	
* Fixing some minor formatting issues	
gr_edit plotregion1._xylines[1].style.editstyle linestyle(color(black)) editcopy // gray line at top

	graph save "Figures/wpop_main_forest_`var'_ ${date}.gph", replace         
	graph export "Figures/wpop_main_forest_`var'_ ${date}.pdf", as(pdf) name(wpop_main_forest_`var') replace
		
frame change default
frame drop wpop_main_forestplot
	
}
		
////////////////////////////////////////////////////////////////////////////////
//////////////// SENSITIVITY ANALYSIS USING EQUAL COUNTRY WEIGHTS //////////////
////////////////////////////////////////////////////////////////////////////////

/*******************************************************************************
ESTIMATIONS AND REGRESSION

This is the same exact same code as in the primary analysis, yet using equal 
weights by for each country.
******************************************************************************/

* Estimating proportions

	* Secondary prevention outcome using all weights
	foreach var of varlist samp_sec_statin statin_sec {

		* Sample weights
		gen sample = 1 if `var' != .

		* bys country: egen sum_wpop_sample=sum(w_all) if sample==1   // pop weights not needed
		* gen wpopnr_sample=w_all*population_2019_40_69/sum_wpop_sample

		bys country: egen sum_weq_sample=sum(w_all) if sample==1		
		gen weqnr_sample=w_all/sum_weq_sample
		
		* Set svyset
		svyset psu_num [pw = weqnr_sample], strata(stratum_num) singleunit(centered)
		
		* Estimates overall output
		svy, subpop(sample): proportion `var'
		estimates store weq_ov_`var'
		matrix weq_ov_`var' = r(table)
		
		* Estimates by country
		svy, subpop(sample): proportion `var', over(country_encoded)
		estimates store weq_c_`var'
		matrix weq_c_`var' = r(table)
		
		* Estimates by country group
		svy, subpop(sample): proportion `var', over(WHOregionclass)
		estimates store weq_r_`var'
		matrix weq_r_`var' = r(table)
		
		svy, subpop(sample): proportion `var', over(countryGDPclass)
		estimates store weq_i_`var'
		matrix weq_i_`var' = r(table)
		
		* Drop variables for loop
		drop sample weqnr_sample sum_weq_sample
	}

	* Primary prevention outcome using fbg weights
	foreach var of varlist samp_pri_lab_statin statin_pri_lab {

		* Sample weights
		gen sample = 1 if `var' != .

		* bys country: egen sum_wpop_sample=sum(wpop_fbg) if sample==1   // pop weights not needed
		* gen wpopnr_sample=wpop_fbg*population_2019_40_69/sum_wpop_sample

		bys country: egen sum_weq_sample=sum(weq_fbg) if sample==1		
		gen weqnr_sample=weq_fbg/sum_weq_sample
		
		* Set svyset
		svyset psu_num [pw = weqnr_sample], strata(stratum_num) singleunit(centered)
		
		* Estimates overall output
		svy, subpop(sample): proportion `var'
		estimates store weq_ov_`var'
		matrix weq_ov_`var' = r(table)
		
		* Estimates by country
		svy, subpop(sample): proportion `var', over(country_encoded)
		estimates store weq_c_`var'
		matrix weq_c_`var' = r(table)
		
		* Estimates by country group
		svy, subpop(sample): proportion `var', over(WHOregionclass)
		estimates store weq_r_`var'
		matrix weq_r_`var' = r(table)
		
		svy, subpop(sample): proportion `var', over(countryGDPclass)
		estimates store weq_i_`var'
		matrix weq_i_`var' = r(table)
		
		* Drop variables for loop
		drop sample weqnr_sample sum_weq_sample
	}

* Multivariable model

	* Secondary prevention outcome using all weights
	foreach var of varlist statin_pri_lab statin_sec {

		* Sample weights
		gen sample = 1 if `var' !=. & age !=. & sex !=. & educat3 !=. & rural !=.

		* bys country: egen sum_wpop_sample=sum(w_all) if sample==1   // pop weights not needed
		* gen wpopnr_sample=w_all*population_2019_40_69/sum_wpop_sample

		bys country: egen sum_weq_sample=sum(w_all) if sample==1		
		gen weqnr_sample=w_all/sum_weq_sample
		
		* Set svyset
		svyset psu_num [pw = weqnr_sample], strata(stratum_num) singleunit(centered)
		
		* Regression
		svy: poisson `var' i.age_cat i.sex i.educat3 i.rural i.country_encoded if sample == 1, irr baselevels
		estimates store weq_pois_`var'
		matrix weq_pois_`var' = r(table)
			
		* Marginal effects
		margins, dydx(i.age_cat i.rural i.sex i.educat3) vce(unconditional) post baselevels
		estimates store weq_margins_`var'
		matrix weq_margins_`var' = r(table)	
		
		* Drop variables for loop
		drop sample weqnr_sample sum_weq_sample
	}

	* Primary prevention outcome using fbg weights
	foreach var of varlist statin_pri_lab {

		* Sample weights
		gen sample = 1 if `var' !=. & age !=. & sex !=. & educat3 !=. & rural !=.

		* bys country: egen sum_wpop_sample=sum(wpop_fbg) if sample==1   // pop weights not needed
		* gen wpopnr_sample=wpop_fbg*population_2019_40_69/sum_wpop_sample

		bys country: egen sum_weq_sample=sum(weq_fbg) if sample==1		
		gen weqnr_sample=weq_fbg/sum_weq_sample
		
		* Set svyset
		svyset psu_num [pw = weqnr_sample], strata(stratum_num) singleunit(centered)
		
		* Regression
		svy: poisson `var' i.age_cat i.sex i.educat3 i.rural i.country_encoded if sample == 1, irr baselevels
		estimates store weq_pois_`var'
		matrix weq_pois_`var' = r(table)
			
		* Marginal effects
		margins, dydx(i.age_cat i.rural i.sex i.educat3) vce(unconditional) post baselevels
		estimates store weq_margins_`var'
		matrix wweq_margins_`var' = r(table)	
		
		* Drop variables for loop
		drop sample weqnr_sample sum_weq_sample
	}

/*******************************************************************************
SEPARATE FORESTPLOT BY REGION, INCOME GROUP, AND OVERALL - WEQ SENSITIVITY
*******************************************************************************/	

foreach var in sec pri_lab {
	
* This step is clunky but gets all the estimates in an easy step
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper"
putexcel set "Putexcel tables/weq forestplot_regiongdp `var' ${date}.xlsx", replace 

putexcel A1 = "category"
putexcel B1 = "est"
putexcel C1 = "lci"
putexcel D1 = "uci"

putexcel A2 = "Region"
putexcel A3 = "Africa"
putexcel A4 = "Americas"
putexcel A5 = "South East Asia"
putexcel A6 = "Western Pacific"
putexcel A7 = "Europe"
putexcel A8 = "Eastern Mediterranean"

putexcel A10 = "Income group"
putexcel A11 = "Low income"
putexcel A12 = "Lower middle"
putexcel A13 = "Upper middle"

putexcel A15 = "Overall"

// Region
matlist weq_r_statin_`var'
matrix results = weq_r_statin_`var'

	* est
	putexcel B3 = results[1,7]
	putexcel B4 = results[1,8]
	putexcel B5 = results[1,9]
	putexcel B6 = results[1,10]
	putexcel B7 = results[1,11]
	putexcel B8 = results[1,12]
	putexcel B9 = results[1,13]

	* lci
	putexcel C3 = results[5,7]
	putexcel C4 = results[5,8]
	putexcel C5 = results[5,9]
	putexcel C6 = results[5,10]
	putexcel C7 = results[5,11]
	putexcel C8 = results[5,12]
	putexcel C9 = results[5,13]

	* uci
	putexcel D3 = results[6,7]
	putexcel D4 = results[6,8]
	putexcel D5 = results[6,9]
	putexcel D6 = results[6,10]
	putexcel D7 = results[6,11]
	putexcel D8 = results[6,12]
	putexcel D9 = results[6,13]

// GDP categories
matlist weq_i_statin_`var'
matrix results = weq_i_statin_`var'

	* est
	putexcel B11 = results[1,4]
	putexcel B12 = results[1,5]
	putexcel B13 = results[1,6]
	putexcel B14 = results[1,7]

	* lci
	putexcel C11 = results[5,4]
	putexcel C12 = results[5,5]
	putexcel C13 = results[5,6]
	putexcel C14 = results[5,7]

	* uci
	putexcel D11 = results[6,4]
	putexcel D12 = results[6,5]
	putexcel D13 = results[6,6]
	putexcel D14 = results[6,7]
	
// Overall
matlist weq_ov_statin_`var'
matrix results = weq_ov_statin_`var'

	* est
	putexcel B15 = results[1,2]
	
	* lci
	putexcel C15 = results[5,2]
	
	* uci
	putexcel D15 = results[6,2]
	
frame create weq_forestplot_regiongdp
frame change weq_forestplot_regiongdp	
	
import excel "Putexcel tables/weq forestplot_regiongdp `var' ${date}.xlsx", sheet("Sheet1") cellrange(A1:D15) firstrow clear
	
* Converting to %	
replace est = est*100
replace lci = lci*100
replace uci = uci*100
	
* See help for what each of the "_USE" categories refers to	
gen _USE = 1
replace _USE = 0 if inlist(category,"Income group","Region")
replace _USE = 6 if missing(category)
replace _USE = 5 if category == "Overall"

* Making string variables	
gen est_s = string(est,"%9.1f")
gen lci_s = string(lci,"%9.1f")
gen uci_s = string(uci,"%9.1f")
gen output = est_s + " (" + lci_s + " to " + uci_s + ")"
replace output = "0 (ref)" if est == 0 | est == .
replace output = "" if est == .

* Realiigning
leftalign

* Adding bolded labels		
label var category `"`"{bf:Characteristic}"'"'
replace category = `"{bf:"' + category + `"}"' if _USE==0
replace category = `"{bf:Overall}"' if category == "Overall"

label var output `"`"{bf:Proportion using}"' `"{bf:statins (%)}"'"'

/*	Note: This code uses the "forestplot" program built by David Fisher. He has a number
	of very helpful posts on Statalist:
	
	https://www.statalist.org/forums/forum/general-stata-discussion/general/1471066-editing-headings-on-metan-forest-plots
*/

* recast str21 category, force


forestplot est lci uci, ///
	labels(category) ///
	nostats /// this tells it not to calculate stats but rather use raw input data
	rcols(output) /// this tells forestplot which columsn to display
	range(0 60) /// range of the plot
	xlabel(0 20 40 60) /// plot labels
	ylabel(,grid) ///
	astext(60) ///
	xsize(4) /// % text in the plot
	ysize(3) ///
	spacing(1.5) /// this refers to spacing by line
	boxscale(80)  ///
	boxopts(mcolor(black)) ///
	pointopts(msymbol(square)) ///
	diamopts(color(black)) ///
	olineopts(lcolor(none)) ///
	plotregion(margin(l=0 r=0 b=0rs t=0) lcolor(none)) ///
	graphregion(margin(l=0 r=0 b=0rs t=0))	///
	null(0) ///
	xtitle("Statin use (%)", size(3.5rs)  margin(l=0 r=0 b=2rs t=2rs)) ///
	name(weq_forest_regdp_statin_`var',replace) //	
	
* Fixing some minor formatting issues
gr_edit .plotregion1._xylines[1].style.editstyle linestyle(color(black)) editcopy

* Making the line
gr_edit .plotregion1.AddLine added_lines editor 50.57696737239915 .0341909628416472 50.57696737239915 14.75215405672569
gr_edit .plotregion1.added_lines_new = 1
gr_edit .plotregion1.added_lines_rec = 1
gr_edit .plotregion1.added_lines[1].style.editstyle  linestyle( width( sztype(relative) val(.1) allow_pct(1)) color(black) pattern(dash) align(inside)) headstyle( symbol(circle) linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) fillcolor(black) size( sztype(relative) val(1.52778) allow_pct(1)) angle(stdarrow) symangle(zero) backsymbol(none) backline( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) backcolor(black) backsize( sztype(relative) val(0) allow_pct(1)) backangle(stdarrow) backsymangle(zero)) headpos(neither) editcopy

* Adding text
gr_edit .plotregion1.AddTextBox added_text editor 15.32558119025364 42.70517571788388
gr_edit .plotregion1.added_text_new = 1
gr_edit .plotregion1.added_text_rec = 1
gr_edit .plotregion1.added_text[1].style.editstyle  angle(default) size( sztype(relative) val(3.4722) allow_pct(1)) color(black) horizontal(left) vertical(middle) margin( gleft( sztype(gr_edit relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) linegap( gr_edit sztype(relative) val(0) allow_pct(1)) drawbox(no) boxmargin( gleft( sztype(relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) fillcolor(bluishgray) linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) box_alignment(east) editcopy
gr_edit .plotregion1.added_text[1].style.editstyle size(2.4) editcopy
gr_edit .plotregion1.added_text[1].text = {}
gr_edit .plotregion1.added_text[1].text.Arrpush {it:WHO target}

* Dragging text
gr_edit .plotregion1.added_text[1].DragBy .2389279723033106 -1

	graph save "Figures/weq_forest_regiongdp statin `var' ${date}.gph", replace         
	graph export "Figures/weq_forest_regiongdp statin `var' ${date}.pdf", as(pdf) name(weq_forest_regdp_statin_`var') replace
	
	frame change default
	frame drop weq_forestplot_regiongdp
}

/*******************************************************************************
FORESTPLOT - WEQ SENSITIVITY
*******************************************************************************/

foreach var in sec pri_lab {

* This step is clunky but gets all the estimates in an easy step
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper"
putexcel set "Putexcel tables/weq Statin_forest_plot `var' ${date}.xlsx", replace 

estimates restore weq_pois_statin_`var' //  Restoring estimates	
estimates replay weq_pois_statin_`var', baselevels // irr

putexcel (A1) = etable	// exporting the whole regression table

estimates restore weq_margins_statin_`var' //  Restoring estimates	
estimates replay weq_margins_statin_`var', baselevels

putexcel (H1) = etable	// exporting the whole regression table

frame create weq_main_forestplot
frame change weq_main_forestplot	
	
import excel "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/Putexcel tables/weq Statin_forest_plot `var' ${date}.xlsx", sheet("Sheet1") cellrange(A2:N19) firstrow clear
	
* Renaming/dropping variables
	
	* Risk ratios
	drop StdErr
	drop t // t value
	rename Coef est_rr // IRR
	rename statin_`var' category
	rename F lci_rr
	rename G uci_rr
	rename Pt pvalue_rr

	* Margins
	rename dydx est_dydx
	drop H // repeat titles
	drop J // standard errors
	drop K // t value
	drop L // margins p-value (use from RR instead)
	rename M lci_dydx
	rename N uci_dydx
		
* Converting to % for margins
replace est_dydx = est_dydx*100
replace lci_dydx = lci_dydx*100
replace uci_dydx = uci_dydx*100
	
* See help for what each of the "_USE" categories refers to	
gen _USE = 1
replace _USE = 0 if inlist(category,"age_cat","sex","educat3","rural")	
replace _USE = 6 if missing(category)

* Naming the categories
replace category = "Age" if category == "age_cat"
replace category = "Sex" if category == "sex"
replace category = "Education" if category == "educat3"
replace category = "Rural residence" if category == "rural"

* Exponentiating
gen est_rr_exp = exp(est_rr)
gen lci_rr_exp = exp(lci_rr)
gen uci_rr_exp = exp(uci_rr)

* Making string variables	
gen pvalue_s = string(pvalue,"%4.3f")
replace pvalue_s = "<0.0001" if pvalue_rr <0.0001
replace pvalue_s = "" if pvalue_rr == .

gen est_rr_s = string(est_rr_exp,"%9.2f")
gen lci_rr_s = string(lci_rr_exp,"%9.2f")
gen uci_rr_s = string(uci_rr_exp,"%9.2f")

gen est_dydx_s = string(est_dydx,"%9.1f")
gen lci_dydx_s = string(lci_dydx,"%9.1f")
gen uci_dydx_s = string(uci_dydx,"%9.1f")

* Generating columns
gen column_rr = est_rr_s + " (" + lci_rr_s + " to " + uci_rr_s + ")"
replace column_rr = "1 (ref)" if est_rr == 0 | est_rr == .
replace column_rr = "" if est_rr == .

gen column_dydx = est_dydx_s + " (" + lci_dydx_s + " to " + uci_dydx_s + ")"
replace column_dydx = "0 (ref)" if est_dydx == 0 | est_dydx == .
replace column_dydx = "" if est_dydx == .

* Realiigning
leftalign

* Adding bolded labels		
label var category `"`"{bf:Characteristic}"'"'
replace category = `"{bf:"' + category + `"}"' if _USE==0

label var pvalue_s `"`"{bf:P value}"'"'
label var column_rr `"`"{bf:Risk ratio               }"' `"{bf:in statin use}"'"'
label var column_dydx `"`"{bf:Absolute difference}"' `"{bf:in statin use (%)}"'"'

/*	Note: This code uses the "forestplot" program built by David Fisher. He has a number
	of variable helpful posts on Statalist:
	
	https://www.statalist.org/forums/forum/general-stata-discussion/general/1471066-editing-headings-on-metan-forest-plots
*/

* recast str21 category, force
	
forestplot est_rr lci_rr uci_rr, ///
	eform /// exponentiate
	labels(category) ///
	nostats /// this tells it not to calculate stats but rather use raw input data
	rcols(column_rr column_dydx pvalue_s) /// this tells forestplot which columsn to display
	range(0.25 4) /// range of the plot
		xlabel(0.25 0.5 1 2 4) /// plot labels
	astext(80) /// % text in the plot
	spacing(1.3) /// this refers to spacing by line
	boxopts(mcolor(none)) ///
	pointopts(msymbol(square)) ///
	plotregion(margin(l=0 r=0 b=0rs t=0) lcolor(none)) ///
	graphregion(margin(l=0 r=0 b=0rs t=0))	///
	xsize(4) ///
	ysize(2.5) ///
	xtitle("Risk ratio", size(3.2rs)  margin(l=0 r=58rs b=2rs t=2rs)) ///
	name(weq_main_forest_`var',replace) //
	
* Fixing some minor formatting issues	
gr_edit plotregion1._xylines[1].style.editstyle linestyle(color(black)) editcopy // gray line at top

	graph save "Figures/weq_main_forest_`var'_ ${date}.gph", replace         
	graph export "Figures/weq_main_forest_`var'_ ${date}.pdf", as(pdf) name(weq_main_forest_`var') replace
		
frame change default
frame drop weq_main_forestplot
	
}

////////////////////////////////////////////////////////////////////////////////
//////////////// SENSITIVITY ANALYSES REMOVING RURAL VARIABLE / ////////////////
////////////////////////////////////////////////////////////////////////////////

* Multivariable model

	* Secondary prevention outcome using all weights
	foreach var of varlist statin_sec {

		* Sample weights
		gen sample = 1 if `var' !=. & age !=. & sex !=. & educat3 !=.

		bys country: egen sum_wpop_sample=sum(w_all) if sample==1   // pop weights not needed
		gen wpopnr_sample=w_all*population_2019_40_69/sum_wpop_sample

		* bys country: egen sum_weq_sample=sum(w_all) if sample==1		
		* gen weqnr_sample=w_all/sum_weq_sample
		
		* Set svyset
		svyset psu_num [pw = wpopnr_sample], strata(stratum_num) singleunit(centered)
		
		* Regression
		svy: poisson `var' i.age_cat i.sex i.educat3 i.country_encoded if sample == 1, irr baselevels
		estimates store wpop_pois_r`var'
		matrix wpop_pois_r`var' = r(table)
			
		* Marginal effects
		margins, dydx(i.age_cat i.sex i.educat3) vce(unconditional) post baselevels
		estimates store wpop_marg_r`var'
		matrix wpop_marg_r`var' = r(table)	
		
		* Drop variables for loop
		drop sample wpopnr_sample sum_wpop_sample
	}

	* Primary prevention outcome using fbg weights
	foreach var of varlist statin_pri_lab {

		* Sample weights
		gen sample = 1 if `var' !=. & age !=. & sex !=. & educat3 !=.

		bys country: egen sum_wpop_sample=sum(wpop_fbg) if sample==1   // pop weights not needed
		gen wpopnr_sample=wpop_fbg*population_2019_40_69/sum_wpop_sample

		* bys country: egen sum_weq_sample=sum(weq_fbg) if sample==1		
		* gen weqnr_sample=weq_fbg/sum_weq_sample
		
		* Set svyset
		svyset psu_num [pw = wpopnr_sample], strata(stratum_num) singleunit(centered)
		
		* Regression
		svy: poisson `var' i.age_cat i.sex i.educat3 i.country_encoded if sample == 1, irr baselevels
		estimates store wpop_pois_r`var'
		matrix wpop_pois_r`var' = r(table)
			
		* Marginal effects
		margins, dydx(i.age_cat i.sex i.educat3) vce(unconditional) post baselevels
		estimates store wpop_marg_r`var'
		matrix wpop_marg_r`var' = r(table)	
		
		* Drop variables for loop
		drop sample wpopnr_sample sum_wpop_sample
	}


/*******************************************************************************
WPOP MARGINS FORESTPLOT - NORURAL SENSITIVITY
*******************************************************************************/

foreach var in sec pri_lab {
		
	* This step is clunky but gets all the estimates in an easy step
	cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper"
	putexcel set "Putexcel tables/wpop Statin_forest_plot rural `var' ${date}.xlsx", replace 

	estimates restore wpop_marg_rstatin_`var' //  Restoring estimates	
	estimates replay wpop_marg_rstatin_`var', baselevels

	putexcel (A1) = etable	// exporting the whole regression table

	frame create wpop_marg_forestplot
	frame change wpop_marg_forestplot	
	
	import excel "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/Putexcel tables/wpop Statin_forest_plot rural `var' ${date}.xlsx", sheet("Sheet1") cellrange(A2:G15) firstrow clear
	
* Renaming/dropping variables
drop StdErr
rename dydx est
rename A category
rename F lci
rename G uci
rename Pt pvalue
	
* Converting to %	
replace est = est*100
replace lci = lci*100
replace uci = uci*100
	
* See help for what each of the "_USE" categories refers to	
gen _USE = 1
replace _USE = 0 if inlist(category,"age_cat","sex","educat3","rural")	
replace _USE = 6 if missing(category)

* Naming the categories
replace category = "Age" if category == "age_cat"
replace category = "Sex" if category == "sex"
replace category = "Education" if category == "educat3"
replace category = "Rural residence" if category == "rural"

* Making string variables	
gen pvalue_s = string(pvalue,"%4.3f")
replace pvalue_s = "<0.0001" if pvalue <0.0001
replace pvalue_s = "" if pvalue == .

gen est_s = string(est,"%9.1f")
gen lci_s = string(lci,"%9.1f")
gen uci_s = string(uci,"%9.1f")
gen output = est_s + " (" + lci_s + " to " + uci_s + ")"
replace output = "0 (ref)" if est == 0 | est == .
replace output = "" if est == .

* Realiigning
leftalign

* Adding bolded labels		
label var category `"`"{bf:Characteristic}"'"'
replace category = `"{bf:"' + category + `"}"' if _USE==0

label var pvalue_s `"`"{bf:P value}"'"'
label var output `"`"{bf:Absolute difference}"' `"{bf:in statin use (%)}"'"'

/*	Note: This code uses the "forestplot" program built by David Fisher. He has a number
	of variable helpful posts on Statalist:
	
	https://www.statalist.org/forums/forum/general-stata-discussion/general/1471066-editing-headings-on-metan-forest-plots
*/

* recast str21 category, force
	
forestplot est lci uci, ///
	labels(category) ///
	nostats /// this tells it not to calculate stats but rather use raw input data
	rcols(output pvalue_s) /// this tells forestplot which columsn to display
	range(-10 30) /// range of the plot
		xlabel(-10 "-10%" 0 "0" 10 "10%" 20 "20%" 30 "30%") /// plot labels
	astext(60) /// % text in the plot
	spacing(1.3) /// this refers to spacing by line
	boxopts(mcolor(none)) ///
	pointopts(msymbol(square)) ///
	plotregion(margin(l=0 r=0 b=0rs t=0) lcolor(none)) ///
	graphregion(margin(l=0 r=0 b=0rs t=0))	///
	xsize(4) ///
	ysize(2.75) ///
	xtitle("Average marginal effect", size(3.2rs)  margin(l=0 r=0 b=0 t=2rs)) ///
	name(wpop_forest_marg_statin_r`var',replace) //
	
* Fixing some minor formatting issues	
gr_edit plotregion1._xylines[1].style.editstyle linestyle(color(black)) editcopy // gray line at top

	graph save "Figures/wpop_forest_margins rural statin `var' ${date}.gph", replace         
	graph export "Figures/wpop_forest_margins rural statin `var' ${date}.pdf", as(pdf) name(wpop_forest_marg_statin_r`var') replace
		
frame change default
frame drop wpop_marg_forestplot		

}


/*******************************************************************************
WPOP RR FORESTPLOT - NORURAL SENSITIVITY
*******************************************************************************/

foreach var in sec pri_lab {
		
	* This step is clunky but gets all the estimates in an easy step
	cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper"
	putexcel set "Putexcel tables/wpop poisson Statin_forest_plot rural `var' ${date}.xlsx", replace 

	estimates restore wpop_pois_rstatin_`var' //  Restoring estimates	
	estimates replay wpop_pois_rstatin_`var', baselevels // irr

	putexcel (A1) = etable	// exporting the whole regression table

	frame create wpop_pois_forestplot
	frame change wpop_pois_forestplot	
	
	import excel "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/Putexcel tables/wpop poisson Statin_forest_plot rural `var' ${date}.xlsx", sheet("Sheet1") cellrange(A2:G15) firstrow clear
	
* Renaming/dropping variables
drop StdErr
rename Coef est // IRR
rename statin_`var' category
rename F lci
rename G uci
rename Pt pvalue
	
* Converting to %	
replace est = est
replace lci = lci
replace uci = uci
	
* See help for what each of the "_USE" categories refers to	
gen _USE = 1
replace _USE = 0 if inlist(category,"age_cat","sex","educat3","rural")	
replace _USE = 6 if missing(category)

* Naming the categories
replace category = "Age" if category == "age_cat"
replace category = "Sex" if category == "sex"
replace category = "Education" if category == "educat3"
replace category = "Rural residence" if category == "rural"

* Making string variables	
gen pvalue_s = string(pvalue,"%4.3f")
replace pvalue_s = "<0.0001" if pvalue <0.0001
replace pvalue_s = "" if pvalue == .

gen est_exp = exp(est)
gen lci_exp = exp(lci)
gen uci_exp = exp(uci)

gen est_s = string(est_exp,"%9.2f")
gen lci_s = string(lci_exp,"%9.2f")
gen uci_s = string(uci_exp,"%9.2f")

gen output = est_s + " (" + lci_s + " to " + uci_s + ")"
replace output = "1 (ref)" if est == 0 | est == .
replace output = "" if est == .

* Realiigning
leftalign

* Adding bolded labels		
label var category `"`"{bf:Characteristic}"'"'
replace category = `"{bf:"' + category + `"}"' if _USE==0

label var pvalue_s `"`"{bf:P value}"'"'
label var output `"`"{bf:Risk ratio               }"' `"{bf:in statin use}"'"'

/*	Note: This code uses the "forestplot" program built by David Fisher. He has a number
	of variable helpful posts on Statalist:
	
	https://www.statalist.org/forums/forum/general-stata-discussion/general/1471066-editing-headings-on-metan-forest-plots
*/

* recast str21 category, force
	
forestplot est lci uci, ///
	eform /// exponentiate
	labels(category) ///
	nostats /// this tells it not to calculate stats but rather use raw input data
	rcols(output pvalue_s) /// this tells forestplot which columsn to display
	range(0.125 8) /// range of the plot
		xlabel(0.125 0.25 0.5 1 2 4 8) /// plot labels
	astext(60) /// % text in the plot
	spacing(1.3) /// this refers to spacing by line
	boxopts(mcolor(none)) ///
	pointopts(msymbol(square)) ///
	plotregion(margin(l=0 r=0 b=0rs t=0) lcolor(none)) ///
	graphregion(margin(l=0 r=0 b=0rs t=0))	///
	xsize(4) ///
	ysize(2.75) ///
	xtitle("Risk ratio", size(3.2rs)  margin(l=0 r=0 b=0 t=2rs)) ///
	name(wpop_pois_forest_r_`var',replace) //
	
* Fixing some minor formatting issues	
gr_edit plotregion1._xylines[1].style.editstyle linestyle(color(black)) editcopy // gray line at top

	graph save "Figures/wpop_pois_forest rural statin `var' ${date}.gph", replace         
	graph export "Figures/wpop_pois_forest rural statin `var' ${date}.pdf", as(pdf) name(wpop_pois_forest_r_`var') replace
		
frame change default
frame drop wpop_pois_forestplot		

}
			

////////////////////////////////////////////////////////////////////////////////
/////////////////////////////// APPENDIX TABLES ////////////////////////////////
////////////////////////////////////////////////////////////////////////////////

/*******************************************************************************
Appendix table - Statin use among primary prevention sample stratified by CVD risk
*******************************************************************************/	
	
gen prim_prev_lab_cat =.
replace prim_prev_lab_cat = 3 if who_10yr_lab > 0.2 & who_10yr_lab <1 & inrange(age,40,69) & pregnant != 1 & mi != .
replace prim_prev_lab_cat = 2 if who_10yr_lab >= 0.1 & who_10yr_lab <= 0.2 & inrange(age,40,69) & pregnant != 1 & mi != .
replace prim_prev_lab_cat = 1 if who_10yr_lab < 0.1 & inrange(age,40,69) & pregnant != 1 & mi != .

tab prim_prev_lab_cat, m
tab prim_prev_lab_cat

tab country prim_prev_lab_cat, m
tab country prim_prev_lab_cat

foreach var of varlist statin {

	* Sample weights
	gen sample = 1 if `var' != .

	bys country: egen sum_wpop_sample=sum(w_all) if sample==1   // pop weights not needed
	gen wpopnr_sample=w_all*population_2019_40_69/sum_wpop_sample

	* bys country: egen sum_weq_sample_reg=sum(w_all) if sample==1		
	* gen weqnr_sample=w_all/sum_weq_sample
	
	* Set svyset
	svyset psu_num [pw = wpopnr_sample], strata(stratum_num) singleunit(centered)
	
	* Estimates overall output
	svy, subpop(sample): proportion `var', over(prim_prev_lab_cat)
	estimates store ov_`var'_prim_prev
	matrix ov_`var'_prim_prev = r(table)
	
	* Estimates by country
	svy, subpop(sample): proportion `var', over(prim_prev_lab_cat country_encoded)
	estimates store c_`var'__prim_prev
	matrix c_`var'__prim_prev = r(table)
	
	* Estimates by country group
	svy, subpop(sample): proportion `var', over(prim_prev_lab_cat WHOregionclass)
	estimates store region_`var'_prim_prev
	matrix region_`var'_prim_prev = r(table)
	
	svy, subpop(sample): proportion `var', over(prim_prev_lab_cat countryGDPclass)
	estimates store income_`var'_prim_prev
	matrix income_`var'_prim_prev = r(table)
	
	* Drop variables for loop
	drop sample wpopnr_sample sum_wpop_sample
}

* This step is clunky but gets all the estimates in an easy step
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper"
putexcel set "Putexcel tables/Appendix table - Statin use among primary prevention sample stratified by CVD risk ${date}.xlsx", replace 

* Make the table frame

putexcel B1 = "CVD risk category"
putexcel C1 = "Proportion (95% CI) using statin for primary prevention of CVD"

putexcel A2 = "Region"
putexcel A3 = "Africa"
putexcel A7 = "Americas"
putexcel A11 = "South East Asia"
putexcel A15 = "Western Pacific"
putexcel A19 = "Europe"
putexcel A23 = "Eastern Mediterranean"
putexcel A27 = "Income group"
putexcel A28 = "Low income"
putexcel A32 = "Lower middle income"
putexcel A36 = "Upper middle income"
putexcel A40 = "Overall"

putexcel B3 = "<10%"
putexcel B7 = "<10%"
putexcel B11 = "<10%"
putexcel B15 = "<10%"
putexcel B19 = "<10%"
putexcel B23 = "<10%"
putexcel B28 = "<10%"
putexcel B32 = "<10%"
putexcel B36 = "<10%"
putexcel B40 = "<10%"

putexcel B4 = "10-20%"
putexcel B8 = "10-20%"
putexcel B12 = "10-20%"
putexcel B16 = "10-20%"
putexcel B20 = "10-20%"
putexcel B24 = "10-20%"
putexcel B29 = "10-20%"
putexcel B33 = "10-20%"
putexcel B37 = "10-20%"
putexcel B41 = "10-20%"


putexcel B5 = ">20%"
putexcel B9 = ">20%"
putexcel B13 = ">20%"
putexcel B17 = ">20%"
putexcel B21 = ">20%"
putexcel B25 = ">20%"
putexcel B30 = ">20%"
putexcel B34 = ">20%"
putexcel B38 = ">20%"
putexcel B42 = ">20%"

* Region

	matlist region_statin_prim_prev
	matrix results = region_statin_prim_prev

	* <10% risk rows
	forvalues i = 1(1)6 {
		
		local mat_column = `i' + 18
		
		local b = string(results[1,`mat_column']*100,"%9.1f")
		local lci = string(results[5,`mat_column']*100,"%9.1f")
		local uci = string(results[6,`mat_column']*100,"%9.1f")
		local excel_row = 3+4*(`i'-1)
		putexcel C`excel_row' = "`b' (`lci'-`uci')"
	}

	* 10-20% risk rows
	forvalues i = 1(1)6 {
		
		local mat_column = `i' + 24 // increased by 6
		
		local b = string(results[1,`mat_column']*100,"%9.1f")
		local lci = string(results[5,`mat_column']*100,"%9.1f")
		local uci = string(results[6,`mat_column']*100,"%9.1f")
		local excel_row = 4+4*(`i'-1) // increased by one
		putexcel C`excel_row' = "`b' (`lci'-`uci')"
	}

	* >20% risk rows
	forvalues i = 1(1)6 {
		
		local mat_column = `i' + 30 // increased by 6
		
		local b = string(results[1,`mat_column']*100,"%9.1f")
		local lci = string(results[5,`mat_column']*100,"%9.1f")
		local uci = string(results[6,`mat_column']*100,"%9.1f")
		local excel_row = 5+4*(`i'-1) // increased by one
		putexcel C`excel_row' = "`b' (`lci'-`uci')"
	}

* Income

	matlist income_statin_prim_prev
	matrix results = income_statin_prim_prev

	* <10% risk rows
	forvalues i = 1(1)3 {
		
		local mat_column = `i' + 9 // increased by 3
		
		local b = string(results[1,`mat_column']*100,"%9.1f")
		local lci = string(results[5,`mat_column']*100,"%9.1f")
		local uci = string(results[6,`mat_column']*100,"%9.1f")
		local excel_row = 28+4*(`i'-1) // increased by one
		putexcel C`excel_row' = "`b' (`lci'-`uci')"
	}
	
	* 10-20% risk rows
	forvalues i = 1(1)3 {
		
		local mat_column = `i' + 12 // increased by 3
		
		local b = string(results[1,`mat_column']*100,"%9.1f")
		local lci = string(results[5,`mat_column']*100,"%9.1f")
		local uci = string(results[6,`mat_column']*100,"%9.1f")
		local excel_row = 29+4*(`i'-1)
		putexcel C`excel_row' = "`b' (`lci'-`uci')"
	}
	
	* >20% risk rows
	forvalues i = 1(1)3 {
		 // increased by one
		local mat_column = `i' + 15 // increased by 3
		
		local b = string(results[1,`mat_column']*100,"%9.1f")
		local lci = string(results[5,`mat_column']*100,"%9.1f")
		local uci = string(results[6,`mat_column']*100,"%9.1f")
		local excel_row = 30+4*(`i'-1) // increased by one
		putexcel C`excel_row' = "`b' (`lci'-`uci')"
	}

* Overall

	matlist ov_statin_prim_prev
	matrix results = ov_statin_prim_prev

	* <10% risk rows
	forvalues i = 1(1)1 {
		
		local mat_column = `i' + 3
		
		local b = string(results[1,`mat_column']*100,"%9.1f")
		local lci = string(results[5,`mat_column']*100,"%9.1f")
		local uci = string(results[6,`mat_column']*100,"%9.1f")
		local excel_row = 40+1*(`i'-1)
		putexcel C`excel_row' = "`b' (`lci'-`uci')"
	}

	* 10-20% risk rows
	forvalues i = 1(1)1 {
		
		local mat_column = `i' + 4 // increased by one
		
		local b = string(results[1,`mat_column']*100,"%9.1f")
		local lci = string(results[5,`mat_column']*100,"%9.1f")
		local uci = string(results[6,`mat_column']*100,"%9.1f")
		local excel_row = 41+1*(`i'-1) // increased by one
		putexcel C`excel_row' = "`b' (`lci'-`uci')"
	}

	* >20% risk rows
	forvalues i = 1(1)1 {
		
		local mat_column = `i' + 5 // increased by one
		
		local b = string(results[1,`mat_column']*100,"%9.1f")
		local lci = string(results[5,`mat_column']*100,"%9.1f")
		local uci = string(results[6,`mat_column']*100,"%9.1f")
		local excel_row = 42+1*(`i'-1) // increased by one
		putexcel C`excel_row' = "`b' (`lci'-`uci')"
	}
	drop prim_prev_lab_cat

/*******************************************************************************
Appendix table - Additional details on study sample by country
*******************************************************************************/

* Create the Excel doc
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/"
putexcel set "Putexcel tables/Appendix table - Additional details on study sample by country ${date}.xlsx", replace 
	   	   
* Make the table frame
putexcel A1 = "Country"
putexcel B1 = "Secondary prevention sample, n"
putexcel C1 = "Secondary prevention sample, weighted %"
putexcel D1 = "Primary prevention sample, n"
putexcel E1 = "Primary prevention sample, weighted %"
putexcel F1 = "Median (IQR) CVD risk among primary prevention sample"

* Macro for bottom row
	scalar bottom_row = `=scalar(n_countries)'+2	

* Sorting the dataset
	sort country_id country 

* "Country"
	forval n = 1/`=scalar(n_countries)' {
	local row = `n'+1	
	putexcel A`row' = country[`n']
	}

* "Secondary prevention sample, n"
	forval n = 1/`=scalar(n_countries)' {
	local row = `n'+1	
	count if samp_sec_statin == 1 & country_encoded == `n'
	local sample = r(N)
	putexcel B`row' = `sample'
	}
	putexcel B2:B`=scalar(bottom_row)',nformat(number_sep)

* Secondary prevention sample, weighted %
	matlist wpop_c_samp_sec_statin
	matrix results = wpop_c_samp_sec_statin
	
	forval n = 1/`=scalar(n_countries)' {
	local row = `n'+1	
		
	local country_start = `n'+`=scalar(n_countries)'
	di `country_start'
	
	local b = string(results[1,`country_start']*100,"%9.1f")
	di `b'
	local lci = string(results[5,`country_start']*100,"%9.1f")
	local uci = string(results[6,`country_start']*100,"%9.1f")
		
	putexcel C`row' = "`b' (`lci'-`uci')"
	}
	
* "Primary prevention sample (2019 risk equations)"
	forval n = 1/`=scalar(n_countries)' {
	local row = `n'+1	
	count if samp_pri_lab_statin == 1 & country_encoded == `n'
	local sample = r(N)
	putexcel D`row' = `sample'
	}
	putexcel D2:D`=scalar(bottom_row)',nformat(number_sep)
	
* Primary prevention sample, weighted %
	matlist wpop_c_samp_pri_lab_statin
	matrix results = wpop_c_samp_pri_lab_statin
	
	forval n = 1/`=scalar(n_countries)' {
	local row = `n'+1	
		
	local country_start = `n'+`=scalar(n_countries)'
	di `country_start'
	
	local b = string(results[1,`country_start']*100,"%9.1f")
	di `b'
	local lci = string(results[5,`country_start']*100,"%9.1f")
	local uci = string(results[6,`country_start']*100,"%9.1f")
		
	putexcel E`row' = "`b' (`lci'-`uci')"
	}


* "Median (IQR) CVD risk"
	forval n = 1/`=scalar(n_countries)' {
		local row = `n'+1	
		sum who_10yr_lab if samp_pri_lab_statin == 1 & country_encoded == `n', detail
		
		local min1 = r(p25)*100
		di `min1'
		local min2 = string(`min1',"%5.1f")
		di `min2'
		
		local med1 = r(p50)*100
		di `med1'
		local med2 = string(`med1',"%5.1f")
		di `med2'
		
		local max1 = r(p75)*100
		di `max1'
		local max2 = string(`max1',"%5.1f")
		di `max2'
		
		putexcel F`row' = "`med2' (`min2'-`max2')"
	}
	putexcel F2:F`=scalar(bottom_row)',nformat(number_sep)
	
	* Overall rows
		
		putexcel A`=scalar(bottom_row)' = "Overall*"
		
		* "Secondary prevention sample, n"
			count if samp_sec_statin == 1
			local sample = r(N)
			putexcel B`=scalar(bottom_row)' = `sample'
			
		* Secondary prevention sample, weighted %
			matlist wpop_ov_samp_sec_statin
			matrix results = wpop_ov_samp_sec_statin
			
			local b = string(results[1,2]*100,"%9.1f")
			di `b'
			local lci = string(results[5,2]*100,"%9.1f")
			local uci = string(results[6,2]*100,"%9.1f")
				
			putexcel C`=scalar(bottom_row)' = "`b' (`lci'-`uci')"
			
		* "Primary prevention sample (2019 risk equations)"
			count if samp_pri_lab_statin == 1
			local sample = r(N)
			putexcel D`=scalar(bottom_row)' = `sample'
						
		* Primary prevention sample, weighted %
			matlist wpop_ov_samp_pri_lab_statin
			matrix results = wpop_ov_samp_pri_lab_statin
			
			local b = string(results[1,2]*100,"%9.1f")
			di `b'
			local lci = string(results[5,2]*100,"%9.1f")
			local uci = string(results[6,2]*100,"%9.1f")
				
			putexcel E`=scalar(bottom_row)' = "`b' (`lci'-`uci')"
						
		* "Median (IQR) CVD risk"
			sum who_10yr_lab if samp_pri_lab_statin == 1, detail
				
			local min1 = r(p25)*100
			di `min1'
			local min2 = string(`min1',"%5.1f")
			di `min2'
			
			local med1 = r(p50)*100
			di `med1'
			local med2 = string(`med1',"%5.1f")
			di `med2'
			
			local max1 = r(p75)*100
			di `max1'
			local max2 = string(`max1',"%5.1f")
			di `max2'
			
			putexcel F`=scalar(bottom_row)' = "`med2' (`min2'-`max2')"

	
/*******************************************************************************
Appendix table - Details on missingness/missing data by country
*******************************************************************************/

* Create the Excel doc
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/"
putexcel set "Putexcel tables/Appendix table - Details on missing data by country ${date}.xlsx", replace 

* Macro for bottom row
	scalar bottom_row = `=scalar(n_countries)'+2	
	   	   
* Make the table frame
putexcel A1 = "Country"
putexcel B1 = "Missing data on prior self-reported CVD (%)"
putexcel C1 = "Missing data on statin use (%)"
putexcel D1 = "Missing data to generate risk scores among primary prevention sample (%)"
putexcel A`=scalar(bottom_row)' = "Overall"

* Sorting the dataset
	sort country_id country 

* "Country"
	forval n = 1/`=scalar(n_countries)' {
	local row = `n'+1	
	putexcel A`row' = country[`n']
	}

* "Missing data on prior MI (%)"
	forval n = 1/`=scalar(n_countries)' {
	local row = `n'+1	
	
	count if sample_all == 1 & mi == . & country_encoded == `n' & w_all != . & w_all != 0
	local missing = r(N)
	
	count if sample_all == 1 & country_encoded == `n' & w_all != . & w_all != 0
	local total = r(N)
	
	local missing_percent = `missing'/`total'*100
	local missing_percent_string = string(`missing_percent',"%5.1f")
	
	if `missing_percent' <0.1 {
		local missing_percent_string = "<0.1" 
	}	
		
	putexcel B`row' = "`missing_percent_string'"
	
	
	}	
	
* "Missing data on statin use (%)"
	forval n = 1/`=scalar(n_countries)' {
	local row = `n'+1	
	
	count if sample_all == 1 & statin == . & country_encoded == `n' & w_all != . & w_all != 0
	local missing = r(N)
	
	count if sample_all == 1 & country_encoded == `n' & w_all != . & w_all != 0
	local total = r(N)
	
	local missing_percent = `missing'/`total'*100
	local missing_percent_string = string(`missing_percent',"%5.1f")
	
	if `missing_percent' <0.1 {
		local missing_percent_string = "<0.1" 
	}
	
	putexcel C`row' = "`missing_percent_string'"
	}
	
* "Missing lab scores, among population primary prevention population (%)"
	forval n = 1/`=scalar(n_countries)' {
	local row = `n'+1	
	
	count if sample_all == 1 & mi == 0 & country_encoded == `n' & risk_above_20_lab == . & w_fbg != . & w_fbg != 0
	local missing = r(N)
	
	count if sample_all == 1 & mi == 0 & country_encoded == `n' & w_fbg != . & w_fbg != 0
	local total  = r(N)
		
	local missing_percent = `missing'/`total'*100
	local missing_percent_string = string(`missing_percent',"%5.1f")
	
	if `missing_percent' <0.1 {
		local missing_percent_string = "<0.1" 
	}
	
	putexcel D`row' = "`missing_percent_string'"
	}
	
* Overall
		
	* "Missing data on prior MI (%)"
	count if sample_all == 1 & mi == . & w_all != . & w_all != 0
	local missing = r(N)
	
	count if sample_all == 1 & w_all != . & w_all != 0
	local total = r(N)
	
	local missing_percent = `missing'/`total'*100
	local missing_percent_string = string(`missing_percent',"%5.1f")
	
	if `missing_percent' <0.1 {
		local missing_percent_string = "<0.1" 
	}
	
	putexcel B`=scalar(bottom_row)' = "`missing_percent_string'"
	
	
	* "Missing data on statin use (%)"
	count if sample_all == 1 & statin == . & w_all != . & w_all != 0
	local missing = r(N)
	
	count if sample_all == 1 & w_all != . & w_all != 0
	local total = r(N)
	
	local missing_percent = `missing'/`total'*100
	local missing_percent_string = string(`missing_percent',"%5.1f")
	
	if `missing_percent' <0.1 {
		local missing_percent_string = "<0.1" 
	}
	
	putexcel C`=scalar(bottom_row)' = "`missing_percent_string'"
		
		
	* "Missing lab scores, among population primary prevention population (%)"
	count if sample_all == 1 & mi == 0 & risk_above_20_lab == . & w_fbg != . & w_fbg != 0
	local missing = r(N)
	
	count if sample_all == 1 & mi == 0 & w_fbg != . & w_fbg != 0
	local total  = r(N)
		
	local missing_percent = `missing'/`total'*100
	local missing_percent_string = string(`missing_percent',"%5.1f")
	
	if `missing_percent' <0.1 {
		local missing_percent_string = "<0.1" 
	}
				
	putexcel D`=scalar(bottom_row)' = "`missing_percent_string'"
		
/*******************************************************************************
Appendix table - Country data used in analyses
*******************************************************************************/	
	
	* Create the Excel doc
	cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/"
	putexcel set "Putexcel tables/Appendix table - Country data used in analyses ${date}.xlsx", replace 
			   
	* Make the table frame
	putexcel A1 = "Country"
	putexcel B1 = "Per capita income (2017 constant international $)"
	putexcel C1 = "Health spending per capita (current international $)"
	putexcel D1 = "DALYS attributable to ischemic heart disease and stroke (per 100,000)"
	putexcel E1 = "NCD policy implementation score, 2017 (%)"
	putexcel F1 = "2019 population age 30-69 years (thousands)"	
		
	* Macro for bottom row
	scalar bottom_row = `=scalar(n_countries)'+2	

	* Sorting the dataset
	sort country_id country 	
		
	* "Country"
		forval n = 1/`=scalar(n_countries)' {
			local row = `n'+1	
			putexcel A`row' = country[`n']
		}
				
	* "Per capita income (2017 constant international $)"
		forval n = 1/`=scalar(n_countries)' {
			local row = `n'+1	
			putexcel B`row' = gni[`n']
		}
		
		* Generating median (IQR) in bottom row
			sum gni if country_id == 1, d
		
			local min1 = r(p25)
			di `min1'
			local min2 = string(`min1',"%9.0fc")
			di `min2'
			
			local med1 = r(p50)
			di `med1'
			local med2 = string(`med1',"%9.0fc")
			di `med2'
			
			local max1 = r(p75)
			di `max1'
			local max2 = string(`max1',"%9.0fc")
			di `max2'
			
			putexcel B`=scalar(bottom_row)' = "`med2' (`min2'-`max2')"		
		
	* "Health spending per capita (current international $)"
		forval n = 1/`=scalar(n_countries)' {
			local row = `n'+1	
			putexcel C`row' = health_spend[`n']
		}
		
		* Generating median (IQR) in bottom row
			sum health_spend if country_id == 1, d
		
			local min1 = r(p25)
			di `min1'
			local min2 = string(`min1',"%9.0fc")
			di `min2'
			
			local med1 = r(p50)
			di `med1'
			local med2 = string(`med1',"%9.0fc")
			di `med2'
			
			local max1 = r(p75)
			di `max1'
			local max2 = string(`max1',"%9.0fc")
			di `max2'
			
			putexcel C`=scalar(bottom_row)' = "`med2' (`min2'-`max2')"	

	* "DALYS attributable to ischemic heart disease and stroke (per 100,000)"
	* "NCD policy implementation score, 2017 (%)"
		forval n = 1/`=scalar(n_countries)' {
			local row = `n'+1	
			putexcel D`row' = dalys[`n']
		}
				
		* Generating median (IQR) in bottom row
			sum dalys if country_id == 1, d
		
			local min1 = r(p25)
			di `min1'
			local min2 = string(`min1',"%9.0fc")
			di `min2'
			
			local med1 = r(p50)
			di `med1'
			local med2 = string(`med1',"%9.0fc")
			di `med2'
			
			local max1 = r(p75)
			di `max1'
			local max2 = string(`max1',"%9.0fc")
			di `max2'
			
			putexcel D`=scalar(bottom_row)' = "`med2' (`min2'-`max2')"	
			
	* "NCD policy implementation score, 2017 (%)"
		forval n = 1/`=scalar(n_countries)' {
			local row = `n'+1	
			putexcel E`row' = policy_score[`n']
		}
		putexcel E2:E`=scalar(bottom_row)',nformat(number_sep)
		
		* Generating median (IQR) in bottom row
			sum policy_score if country_id == 1, d
		
			local min1 = r(p25)
			di `min1'
			local min2 = string(`min1',"%9.0fc")
			di `min2'
			
			local med1 = r(p50)
			di `med1'
			local med2 = string(`med1',"%9.0fc")
			di `med2'
			
			local max1 = r(p75)
			di `max1'
			local max2 = string(`max1',"%9.0fc")
			di `max2'
			
			putexcel E`=scalar(bottom_row)' = "`med2' (`min2'-`max2')"	
		
	* "2019 population age 30-69 years (thousands)"
		gen population_2019_40_69_thous = population_2019_40_69/1000
		
		forval n = 1/`=scalar(n_countries)' {
			local row = `n'+1	
			putexcel F`row' = population_2019_40_69_thous[`n']
		}
		putexcel A2:F`=scalar(bottom_row)',nformat(number_sep)
				
		* Generating median (IQR) in bottom row
			sum population_2019_40_69_thous if country_id == 1, d
		
			local min1 = r(p25)
			di `min1'
			local min2 = string(`min1',"%9.0fc")
			di `min2'
			
			local med1 = r(p50)
			di `med1'
			local med2 = string(`med1',"%9.0fc")
			di `med2'
			
			local max1 = r(p75)
			di `max1'
			local max2 = string(`max1',"%9.0fc")
			di `max2'
			
			putexcel F`=scalar(bottom_row)' = "`med2' (`min2'-`max2')"
			
		drop population_2019_40_69_thous
	
/*******************************************************************************
Appendix table - Proportion of statin use by country
*******************************************************************************/

* Create the Excel doc
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/"
putexcel set "Putexcel tables/Appendix table - Proportion of statin use by country ${date}.xlsx", replace 
	   	   
* Make the table frame
putexcel A1 = "Country"
putexcel B1 = "Proportion (95% CI) using statins for secondary prevention"
putexcel C1 = "Proportion (95% CI) using statins for primary prevention (2019 WHO risk scores)"
putexcel D1 = "Proportion (95% CI) using statins for primary prevention (2007 WHO/ISH risk scores)"


* Macro for bottom row
	scalar bottom_row = `=scalar(n_countries)'+2	

* Sorting the dataset
	sort country_id country 

* "Country"
	forval n = 1/`=scalar(n_countries)' {
	local row = `n'+1	
	putexcel A`row' = country[`n']
	}


* "Proportion (95% CI) using statins for secondary prevention"
	matlist wpop_c_statin_sec
	matrix results = wpop_c_statin_sec
	
	forval n = 1/`=scalar(n_countries)' {
	local row = `n'+1	
		
	local country_start = `n'+`=scalar(n_countries)'
	di `country_start'
	
	local b = string(results[1,`country_start']*100,"%9.1f")
	di `b'
	local lci = string(results[5,`country_start']*100,"%9.1f")
	local uci = string(results[6,`country_start']*100,"%9.1f")
		
	putexcel B`row' = "`b' (`lci'-`uci')"
	}

* "Proportion (95% CI) using statins for primary prevention"
	matlist wpop_c_statin_pri_lab
	matrix results = wpop_c_statin_pri_lab
	
	forval n = 1/`=scalar(n_countries)' {
	local row = `n'+1	
		
	local country_start = `n'+`=scalar(n_countries)'
	di `country_start'
	
	local b = string(results[1,`country_start']*100,"%9.1f")
	di `b'
	local lci = string(results[5,`country_start']*100,"%9.1f")
	local uci = string(results[6,`country_start']*100,"%9.1f")
		
		if results[1,`country_start'] == 0 { //`b' == "0.0" {
			putexcel C`row' = "0"
		}
		
		else {
			putexcel C`row' = "`b' (`lci'-`uci')"
		}
	}
	
* "Proportion (95% CI) using statins for primary prevention (sensitivity analysis with WHO/ISH 2007 risk scores)"
	matlist wpop_c_statin_ish
	matrix results = wpop_c_statin_ish
	
	forval n = 1/`=scalar(n_countries)' {
	local row = `n'+1	
		
	local country_start = `n'+`=scalar(n_countries)'
	di `country_start'
	
	local b = string(results[1,`country_start']*100,"%9.1f")
	di `b'
	local lci = string(results[5,`country_start']*100,"%9.1f")
	local uci = string(results[6,`country_start']*100,"%9.1f")
		
		if results[1,`country_start'] == 0 { //`b' == "0.0" {
			putexcel D`row' = "0"
		}
		
		else {
			putexcel D`row' = "`b' (`lci'-`uci')"
		}
	}
		
/*******************************************************************************
Appendix table - Sample characteristics by covariates
*******************************************************************************/

* Create the Excel doc
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/"
putexcel set "Putexcel tables/Appendix table sample characteristics ${date}.xlsx", replace 
	   	   
* Make the table frame
putexcel A1 = "Characteristic"
putexcel A2 = "Age"
putexcel A3 = "<50 years"
putexcel A4 = "50-59 years"
putexcel A5 = "60-69 years"
putexcel A6 = "Sex"
putexcel A7 = "Male"
putexcel A8 = "Female"
putexcel A9 = "Education"
putexcel A10 = "No schooling"
putexcel A11 = "Primary education"
putexcel A12 = "Secondary or above"
putexcel A13 = "Rural vs. urban residence"
putexcel A14 = "Urban"
putexcel A15 = "Rural"

putexcel B1 = "Total sample"
putexcel D1 = "Secondary prevention sample"
putexcel F1 = "Primary prevention sample"

putexcel B2 = "n"
putexcel D2 = "n"
putexcel F2 = "n"

putexcel C2 = "Weighted %"
putexcel E2 = "Weighted %"
putexcel G2 = "Weighted %"

putexcel C16 = 100
putexcel E16 = 100
putexcel G16 = 100


* Macro for bottom row
scalar bottom_row = `=scalar(n_countries)'+2	

* Sorting the dataset
sort country_id country 

* n  ---------------------------------------------------------------------------

	* age_cat

		tab age_cat if sample_all == 1, matcell(matrix_sample_table)
		matlist matrix_sample_table
		putexcel B3 = matrix_sample_table[1,1]
		putexcel B4 = matrix_sample_table[2,1]
		putexcel B5 = matrix_sample_table[3,1]
		
		tab age_cat if samp_sec_statin == 1, matcell(matrix_sample_table)
		matlist matrix_sample_table
		putexcel D3 = matrix_sample_table[1,1]
		putexcel D4 = matrix_sample_table[2,1]
		putexcel D5 = matrix_sample_table[3,1]
		
		tab age_cat if samp_pri_lab_statin == 1, matcell(matrix_sample_table)
		matlist matrix_sample_table
		putexcel F3 = matrix_sample_table[1,1]
		putexcel F4 = matrix_sample_table[2,1]
		putexcel F5 = matrix_sample_table[3,1]
	
	* sex

		tab sex if sample_all == 1, matcell(matrix_sample_table)
		matlist matrix_sample_table
		putexcel B7 = matrix_sample_table[1,1]
		putexcel B8 = matrix_sample_table[2,1]
				
		tab sex if samp_sec_statin == 1, matcell(matrix_sample_table)
		matlist matrix_sample_table
		putexcel D7 = matrix_sample_table[1,1]
		putexcel D8 = matrix_sample_table[2,1]
				
		tab sex if  samp_pri_lab_statin == 1, matcell(matrix_sample_table)
		matlist matrix_sample_table
		putexcel F7 = matrix_sample_table[1,1]
		putexcel F8 = matrix_sample_table[2,1]
				
	* educat3

		tab educat3 if sample_all == 1, matcell(matrix_sample_table)
		matlist matrix_sample_table
		putexcel B10 = matrix_sample_table[1,1]
		putexcel B11 = matrix_sample_table[2,1]
		putexcel B12 = matrix_sample_table[3,1]
		
		tab educat3 if samp_sec_statin == 1, matcell(matrix_sample_table)
		matlist matrix_sample_table
		putexcel D10 = matrix_sample_table[1,1]
		putexcel D11 = matrix_sample_table[2,1]
		putexcel D12 = matrix_sample_table[3,1]
		
		tab educat3 if  samp_pri_lab_statin == 1, matcell(matrix_sample_table)
		matlist matrix_sample_table
		putexcel F10 = matrix_sample_table[1,1]
		putexcel F11 = matrix_sample_table[2,1]
		putexcel F12 = matrix_sample_table[3,1]
	
	* rural

		tab rural if sample_all == 1, matcell(matrix_sample_table)
		matlist matrix_sample_table
		putexcel B14 = matrix_sample_table[1,1]
		putexcel B15 = matrix_sample_table[2,1]
				
		tab rural if samp_sec_statin == 1, matcell(matrix_sample_table)
		matlist matrix_sample_table
		putexcel D14 = matrix_sample_table[1,1]
		putexcel D15 = matrix_sample_table[2,1]
				
		tab rural if  samp_pri_lab_statin == 1, matcell(matrix_sample_table)
		matlist matrix_sample_table
		putexcel F14 = matrix_sample_table[1,1]
		putexcel F15 = matrix_sample_table[2,1]
		
	* total
	
		tab sample_all if sample_all == 1, matcell(matrix_sample_table)
		matlist matrix_sample_table
		putexcel B16 = matrix_sample_table[1,1]
		
		tab samp_sec_statin if samp_sec_statin == 1, matcell(matrix_sample_table)
		matlist matrix_sample_table
		putexcel D16 = matrix_sample_table[1,1]
		
		tab samp_pri_lab_statin if samp_pri_lab_statin == 1, matcell(matrix_sample_table)
		matlist matrix_sample_table
		putexcel F16 = matrix_sample_table[1,1]
		
	* Formatting	
	putexcel B2:B16,nformat(number_sep)
	putexcel D2:D16,nformat(number_sep)
	putexcel F2:F16,nformat(number_sep)
	
* Weighted for total and secondary CVD sample ----------------------------------

	* age_cat
	
		* sample_all
		foreach var of varlist sample_all  {

			* Sample weights
			gen sample = 1 if `var' == 1 
		
			bys country: egen sum_wpop_sample=sum(w_all) if sample==1   // pop weights not needed
			gen wpopnr_sample=w_all*population_2019_40_69/sum_wpop_sample

			* bys country: egen sum_weq_sample=sum(w_all) if sample==1		
			* gen weqnr_sample=w_all/sum_weq_sample
			
			* Set svyset
			svyset psu_num [pw = wpopnr_sample], strata(stratum_num) singleunit(centered)
				
			* Estimates by country
			svy, subpop(sample): proportion age_cat
			matrix matrix_sample_table = r(table)
					
			* Drop variables for loop
			drop wpopnr_sample sum_wpop_sample sample
		}	
		
			matlist matrix_sample_table
					
			local b = string(matrix_sample_table[1,1]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,1]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,1]*100,"%9.1f")
			putexcel C3 = "`b' (`lci'-`uci')"
		
			local b = string(matrix_sample_table[1,2]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,2]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,2]*100,"%9.1f")
			putexcel C4 = "`b' (`lci'-`uci')"
			
			local b = string(matrix_sample_table[1,3]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,3]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,3]*100,"%9.1f")
			putexcel C5 = "`b' (`lci'-`uci')"
			
		* samp_sec_statin
		foreach var of varlist samp_sec_statin  {
			
			* Sample weights
			gen sample = 1 if `var' == 1 
		
			bys country: egen sum_wpop_sample=sum(w_all) if sample==1   // pop weights not needed
			gen wpopnr_sample=w_all*population_2019_40_69/sum_wpop_sample

			* bys country: egen sum_weq_sample=sum(w_all) if sample==1		
			* gen weqnr_sample=w_all/sum_weq_sample
			
			* Set svyset
			svyset psu_num [pw = wpopnr_sample], strata(stratum_num) singleunit(centered)
				
			* Estimates by country
			svy, subpop(sample): proportion age_cat
			matrix matrix_sample_table = r(table)
					
			* Drop variables for loop
			drop wpopnr_sample sum_wpop_sample sample
		}	
		
			matlist matrix_sample_table
					
			local b = string(matrix_sample_table[1,1]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,1]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,1]*100,"%9.1f")
			putexcel E3 = "`b' (`lci'-`uci')"
		
			local b = string(matrix_sample_table[1,2]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,2]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,2]*100,"%9.1f")
			putexcel E4 = "`b' (`lci'-`uci')"
			
			local b = string(matrix_sample_table[1,3]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,3]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,3]*100,"%9.1f")
			putexcel E5 = "`b' (`lci'-`uci')"
			
		* samp_pri_lab_statin
		foreach var of varlist samp_pri_lab_statin  {

		
		
			* Sample weights
			gen sample = 1 if `var' == 1 
						
			bys country: egen sum_wpop_sample=sum(wpop_fbg) if sample==1   // pop weights not needed
			gen wpopnr_sample=wpop_fbg*population_2019_40_69/sum_wpop_sample

			* bys country: egen sum_weq_sample=sum(w_all) if sample==1		
			* gen weqnr_sample=w_all/sum_weq_sample
			
			* Set svyset
			svyset psu_num [pw = wpopnr_sample], strata(stratum_num) singleunit(centered)
				
			* Estimates by country
			svy, subpop(sample): proportion age_cat
			matrix matrix_sample_table = r(table)
					
			* Drop variables for loop
			drop wpopnr_sample sum_wpop_sample sample
		}	
		
			matlist matrix_sample_table
					
			local b = string(matrix_sample_table[1,1]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,1]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,1]*100,"%9.1f")
			putexcel G3 = "`b' (`lci'-`uci')"
		
			local b = string(matrix_sample_table[1,2]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,2]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,2]*100,"%9.1f")
			putexcel G4 = "`b' (`lci'-`uci')"
			
			local b = string(matrix_sample_table[1,3]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,3]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,3]*100,"%9.1f")
			putexcel G5 = "`b' (`lci'-`uci')"

	* sex
		
		* sample_all
		foreach var of varlist sample_all  {

			* Sample weights
			gen sample = 1 if `var' == 1 
			
			bys country: egen sum_wpop_sample=sum(w_all) if sample==1   // pop weights not needed
			gen wpopnr_sample=w_all*population_2019_40_69/sum_wpop_sample

			* bys country: egen sum_weq_sample=sum(w_all) if sample==1		
			* gen weqnr_sample=w_all/sum_weq_sample
			
			* Set svyset
			svyset psu_num [pw = wpopnr_sample], strata(stratum_num) singleunit(centered)
				
			* Estimates by country
			svy, subpop(sample): proportion sex
			matrix matrix_sample_table = r(table)
					
			* Drop variables for loop
			drop wpopnr_sample sum_wpop_sample sample
		}	
		
			matlist matrix_sample_table
					
			local b = string(matrix_sample_table[1,1]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,1]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,1]*100,"%9.1f")
			putexcel C7 = "`b' (`lci'-`uci')"
		
			local b = string(matrix_sample_table[1,2]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,2]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,2]*100,"%9.1f")
			putexcel C8 = "`b' (`lci'-`uci')"
				
		* samp_sec_statin
		foreach var of varlist samp_sec_statin  {

			* Sample weights
			gen sample = 1 if `var' == 1 
			
			bys country: egen sum_wpop_sample=sum(w_all) if sample==1   // pop weights not needed
			gen wpopnr_sample=w_all*population_2019_40_69/sum_wpop_sample

			* bys country: egen sum_weq_sample=sum(w_all) if sample==1		
			* gen weqnr_sample=w_all/sum_weq_sample
			
			* Set svyset
			svyset psu_num [pw = wpopnr_sample], strata(stratum_num) singleunit(centered)
				
			* Estimates by country
			svy, subpop(sample): proportion sex
			matrix matrix_sample_table = r(table)
					
			* Drop variables for loop
			drop wpopnr_sample sum_wpop_sample sample
		}	
		
			matlist matrix_sample_table
					
			local b = string(matrix_sample_table[1,1]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,1]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,1]*100,"%9.1f")
			putexcel E7 = "`b' (`lci'-`uci')"
		
			local b = string(matrix_sample_table[1,2]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,2]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,2]*100,"%9.1f")
			putexcel E8 = "`b' (`lci'-`uci')"
					
		* samp_pri_lab_statin
		foreach var of varlist samp_pri_lab_statin  {

			* Sample weights
			gen sample = 1 if `var' == 1 
			
			bys country: egen sum_wpop_sample=sum(wpop_fbg) if sample==1   // pop weights not needed
			gen wpopnr_sample=wpop_fbg*population_2019_40_69/sum_wpop_sample

			* bys country: egen sum_weq_sample=sum(w_all) if sample==1		
			* gen weqnr_sample=w_all/sum_weq_sample
			
			* Set svyset
			svyset psu_num [pw = wpopnr_sample], strata(stratum_num) singleunit(centered)
				
			* Estimates by country
			svy, subpop(sample): proportion sex
			matrix matrix_sample_table = r(table)
					
			* Drop variables for loop
			drop wpopnr_sample sum_wpop_sample sample
		}	
		
			matlist matrix_sample_table
					
			local b = string(matrix_sample_table[1,1]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,1]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,1]*100,"%9.1f")
			putexcel G7 = "`b' (`lci'-`uci')"
		
			local b = string(matrix_sample_table[1,2]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,2]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,2]*100,"%9.1f")
			putexcel G8 = "`b' (`lci'-`uci')"
				
	* educat3

		* sample_all
		foreach var of varlist sample_all  {

			* Sample weights
			gen sample = 1 if `var' == 1 
			
			bys country: egen sum_wpop_sample=sum(w_all) if sample==1   // pop weights not needed
			gen wpopnr_sample=w_all*population_2019_40_69/sum_wpop_sample

			* bys country: egen sum_weq_sample=sum(w_all) if sample==1		
			* gen weqnr_sample=w_all/sum_weq_sample
			
			* Set svyset
			svyset psu_num [pw = wpopnr_sample], strata(stratum_num) singleunit(centered)
				
			* Estimates by country
			svy, subpop(sample): proportion educat3
			matrix matrix_sample_table = r(table)
					
			* Drop variables for loop
			drop wpopnr_sample sum_wpop_sample sample
		}	
		
			matlist matrix_sample_table
					
			local b = string(matrix_sample_table[1,1]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,1]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,1]*100,"%9.1f")
			putexcel C10 = "`b' (`lci'-`uci')"
		
			local b = string(matrix_sample_table[1,2]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,2]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,2]*100,"%9.1f")
			putexcel C11 = "`b' (`lci'-`uci')"
			
			local b = string(matrix_sample_table[1,3]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,3]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,3]*100,"%9.1f")
			putexcel C12 = "`b' (`lci'-`uci')"
			
		* samp_sec_statin
		foreach var of varlist samp_sec_statin  {

			* Sample weights
			gen sample = 1 if `var' == 1 
			
			bys country: egen sum_wpop_sample=sum(w_all) if sample==1   // pop weights not needed
			gen wpopnr_sample=w_all*population_2019_40_69/sum_wpop_sample

			* bys country: egen sum_weq_sample=sum(w_all) if sample==1		
			* gen weqnr_sample=w_all/sum_weq_sample
			
			* Set svyset
			svyset psu_num [pw = wpopnr_sample], strata(stratum_num) singleunit(centered)
				
			* Estimates by country
			svy, subpop(sample): proportion educat3
			matrix matrix_sample_table = r(table)
					
			* Drop variables for loop
			drop wpopnr_sample sum_wpop_sample sample
		}	
		
			matlist matrix_sample_table
					
			local b = string(matrix_sample_table[1,1]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,1]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,1]*100,"%9.1f")
			putexcel E10 = "`b' (`lci'-`uci')"
		
			local b = string(matrix_sample_table[1,2]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,2]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,2]*100,"%9.1f")
			putexcel E11 = "`b' (`lci'-`uci')"
			
			local b = string(matrix_sample_table[1,3]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,3]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,3]*100,"%9.1f")
			putexcel E12 = "`b' (`lci'-`uci')"
			
		* samp_pri_lab_statin
		foreach var of varlist samp_pri_lab_statin  {

			* Sample weights
			gen sample = 1 if `var' == 1 
			
			bys country: egen sum_wpop_sample=sum(wpop_fbg) if sample==1   // pop weights not needed
			gen wpopnr_sample=wpop_fbg*population_2019_40_69/sum_wpop_sample

			* bys country: egen sum_weq_sample=sum(w_all) if sample==1		
			* gen weqnr_sample=w_all/sum_weq_sample
			
			* Set svyset
			svyset psu_num [pw = wpopnr_sample], strata(stratum_num) singleunit(centered)
				
			* Estimates by country
			svy, subpop(sample): proportion educat3
			matrix matrix_sample_table = r(table)
					
			* Drop variables for loop
			drop wpopnr_sample sum_wpop_sample sample
		}	
		
			matlist matrix_sample_table
					
			local b = string(matrix_sample_table[1,1]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,1]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,1]*100,"%9.1f")
			putexcel G10 = "`b' (`lci'-`uci')"
		
			local b = string(matrix_sample_table[1,2]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,2]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,2]*100,"%9.1f")
			putexcel G11 = "`b' (`lci'-`uci')"
			
			local b = string(matrix_sample_table[1,3]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,3]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,3]*100,"%9.1f")
			putexcel G12 = "`b' (`lci'-`uci')"

	* rural

		* sample_all
		foreach var of varlist sample_all  {

			* Sample weights
			gen sample = 1 if `var' == 1 
			
			bys country: egen sum_wpop_sample=sum(w_all) if sample==1   // pop weights not needed
			gen wpopnr_sample=w_all*population_2019_40_69/sum_wpop_sample

			* bys country: egen sum_weq_sample=sum(w_all) if sample==1		
			* gen weqnr_sample=w_all/sum_weq_sample
			
			* Set svyset
			svyset psu_num [pw = wpopnr_sample], strata(stratum_num) singleunit(centered)
				
			* Estimates by country
			svy, subpop(sample): proportion rural
			matrix matrix_sample_table = r(table)
					
			* Drop variables for loop
			drop wpopnr_sample sum_wpop_sample sample
		}	
		
			matlist matrix_sample_table
					
			local b = string(matrix_sample_table[1,1]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,1]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,1]*100,"%9.1f")
			putexcel C14 = "`b' (`lci'-`uci')"
		
			local b = string(matrix_sample_table[1,2]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,2]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,2]*100,"%9.1f")
			putexcel C15 = "`b' (`lci'-`uci')"
				
		* samp_sec_statin
		foreach var of varlist samp_sec_statin  {

			* Sample weights
			gen sample = 1 if `var' == 1 
			
			bys country: egen sum_wpop_sample=sum(w_all) if sample==1   // pop weights not needed
			gen wpopnr_sample=w_all*population_2019_40_69/sum_wpop_sample

			* bys country: egen sum_weq_sample=sum(w_all) if sample==1		
			* gen weqnr_sample=w_all/sum_weq_sample
			
			* Set svyset
			svyset psu_num [pw = wpopnr_sample], strata(stratum_num) singleunit(centered)
				
			* Estimates by country
			svy, subpop(sample): proportion rural
			matrix matrix_sample_table = r(table)
					
			* Drop variables for loop
			drop wpopnr_sample sum_wpop_sample sample
		}	
		
			matlist matrix_sample_table
					
			local b = string(matrix_sample_table[1,1]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,1]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,1]*100,"%9.1f")
			putexcel E14 = "`b' (`lci'-`uci')"
		
			local b = string(matrix_sample_table[1,2]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,2]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,2]*100,"%9.1f")
			putexcel E15 = "`b' (`lci'-`uci')"
					
		* samp_pri_lab_statin
		foreach var of varlist samp_pri_lab_statin  {

			* Sample weights
			gen sample = 1 if `var' == 1 
			
			bys country: egen sum_wpop_sample=sum(wpop_fbg) if sample==1   // pop weights not needed
			gen wpopnr_sample=wpop_fbg*population_2019_40_69/sum_wpop_sample

			* bys country: egen sum_weq_sample=sum(w_all) if sample==1		
			* gen weqnr_sample=w_all/sum_weq_sample
			
			* Set svyset
			svyset psu_num [pw = wpopnr_sample], strata(stratum_num) singleunit(centered)
				
			* Estimates by country
			svy, subpop(sample): proportion rural
			matrix matrix_sample_table = r(table)
					
			* Drop variables for loop
			drop wpopnr_sample sum_wpop_sample sample
		}	
		
			matlist matrix_sample_table
					
			local b = string(matrix_sample_table[1,1]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,1]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,1]*100,"%9.1f")
			putexcel G14 = "`b' (`lci'-`uci')"
		
			local b = string(matrix_sample_table[1,2]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,2]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,2]*100,"%9.1f")
			putexcel G15 = "`b' (`lci'-`uci')"
		
	* Formatting	
	putexcel B2:B16,nformat(number_sep)
	putexcel D2:D16,nformat(number_sep)
	putexcel F2:F16,nformat(number_sep)
				
/*******************************************************************************
Appendix table - RR and dydx for statin secondary prevention from within-country models
*******************************************************************************/

* Secondary prevention ------------------------------------

	sort country_encoded

	* Create the Excel doc
	cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/Putexcel tables"
	putexcel set "Appendix table - country covariate plot statin_sec ${date}.xlsx", replace 
		
	* Make the table frame
	putexcel A2 = "Country"
	
	putexcel B2 = "Relative risk (95% CI)"
	putexcel D2 = "Relative risk (95% CI)"
	putexcel F2 = "Relative risk (95% CI)"
	putexcel H2 = "Relative risk (95% CI)"
	
	putexcel C2 = "Absolute change, % (95% CI)"
	putexcel E2 = "Absolute change, % (95% CI)"
	putexcel G2 = "Absolute change, % (95% CI)"
	putexcel I2 = "Absolute change, % (95% CI)"
	
	putexcel B1 = "Female vs. male sex (reference category)"
	putexcel D1 = "≥ 55 years vs. ≤ 55 years (reference category)"
	putexcel F1 = "≥ Secondary education vs ≤ primary school (reference category)"
	putexcel H1 = "Rural vs. urban residence (reference category)"
	
					   
	* Column: Country
	sort country_id country_encoded
	forval n = 1/`=scalar(n_countries)' {
	local row = `n'+2
	putexcel A`row' = country[`n']
	}		   
	
	* RR------------------------------------------------------------------------
	
		* Column: rr_statin_sec_sex

			foreach var in rr_statin_sec_sex {
				
				forval n = 1/`=scalar(n_countries)' {
					
					local row = `n'+2	
						
					local country_start = `n'+`=scalar(n_countries)'
					
					matlist c_`var'_`n'
					matrix results = c_`var'_`n'
					
					local b = string(results[1,2],"%9.2f")
					local lci = string(results[5,2],"%9.2f")
					local uci = string(results[6,2],"%9.2f")
						
					if results[1,2] == . { //`b' == "0.0" {
						putexcel B`row' = "N/A"
					}
					
					else {
						putexcel B`row' = "`b' (`lci'-`uci')"
					}	
				}
			}
			
			* Column: rr_statin_sec_age_cat2

			foreach var in rr_statin_sec_age_cat2 {
				
				forval n = 1/`=scalar(n_countries)' {
					
					local row = `n'+2	
						
					local country_start = `n'+`=scalar(n_countries)'
					
					matlist c_`var'_`n'
					matrix results = c_`var'_`n'
					
					local b = string(results[1,2],"%9.2f")
					local lci = string(results[5,2],"%9.2f")
					local uci = string(results[6,2],"%9.2f")
						
					if results[1,1] == . { //`b' == "0.0" {
						putexcel D`row' = "N/A"
					}
					
					else {
						putexcel D`row' = "`b' (`lci'-`uci')"
					}	
				}
			}
			
				* Column: rr_statin_sec_edbin

			foreach var in rr_statin_sec_edbin {
				
				forval n = 1/`=scalar(n_countries)' {
					
					local row = `n'+2	
						
					local country_start = `n'+`=scalar(n_countries)'
					
					matlist c_`var'_`n'
					matrix results = c_`var'_`n'
					
					local b = string(results[1,2],"%9.2f")
					local lci = string(results[5,2],"%9.2f")
					local uci = string(results[6,2],"%9.2f")
						
					if results[1,2] == . { //`b' == "0.0" {
						putexcel F`row' = "N/A"
					}
					
					else {
						putexcel F`row' = "`b' (`lci'-`uci')"
					}	
				}
			}
			
			* Column: rr_statin_sec_rural

			foreach var in rr_statin_sec_rural {
				
				forval n = 1/`=scalar(n_countries)' {
					
					local row = `n'+2	
						
					local country_start = `n'+`=scalar(n_countries)'
					
					matlist c_`var'_`n'
					matrix results = c_`var'_`n'
					
					local b = string(results[1,2],"%9.2f")
					local lci = string(results[5,2],"%9.2f")
					local uci = string(results[6,2],"%9.2f")
						
					if results[1,2] == . { //`b' == "0.0" {
						putexcel H`row' = "N/A"
					}
					
					else {
						putexcel H`row' = "`b' (`lci'-`uci')"
					}	
				}
			}
		
	* dydx----------------------------------------------------------------------
	
		* Column: dydx_statin_sec_sex

			foreach var in dydx_statin_sec_sex {
				
				forval n = 1/`=scalar(n_countries)' {
					
					local row = `n'+2	
						
					local country_start = `n'+`=scalar(n_countries)'
					
					matlist c_`var'_`n'
					matrix results = c_`var'_`n'
					
					local b = string(results[1,2]*100,"%9.1f")
					local lci = string(results[5,2]*100,"%9.1f")
					local uci = string(results[6,2]*100,"%9.1f")
						
					if results[1,2] == . { //`b' == "0.0" {
						putexcel C`row' = "N/A"
					}
					
					else {
						putexcel C`row' = "`b' (`lci' to `uci')"
					}	
				}
			}
			
		* Column: dydx_statin_sec_age_cat2

			foreach var in dydx_statin_sec_age_cat2 {
				
				forval n = 1/`=scalar(n_countries)' {
					
					local row = `n'+2	
						
					local country_start = `n'+`=scalar(n_countries)'
					
					matlist c_`var'_`n'
					matrix results = c_`var'_`n'
					
					local b = string(results[1,2]*100,"%9.1f")
					local lci = string(results[5,2]*100,"%9.1f")
					local uci = string(results[6,2]*100,"%9.1f")
						
					if results[1,1] == . { //`b' == "0.0" {
						putexcel E`row' = "N/A"
					}
					
					else {
						putexcel E`row' = "`b' (`lci' to `uci')"
					}	
				}
			}
			
		* Column: dydx_statin_sec_edbin

			foreach var in dydx_statin_sec_edbin {
				
				forval n = 1/`=scalar(n_countries)' {
					
					local row = `n'+2	
						
					local country_start = `n'+`=scalar(n_countries)'
					
					matlist c_`var'_`n'
					matrix results = c_`var'_`n'
					
					local b = string(results[1,2]*100,"%9.1f")
					local lci = string(results[5,2]*100,"%9.1f")
					local uci = string(results[6,2]*100,"%9.1f")
						
					if results[1,2] == . { //`b' == "0.0" {
						putexcel G`row' = "N/A"
					}
					
					else {
						putexcel G`row' = "`b' (`lci' to `uci')"
					}	
				}
			}
			
		* Column: dydx_statin_sec_rural

			foreach var in dydx_statin_sec_rural {
				
				forval n = 1/`=scalar(n_countries)' {
					
					local row = `n'+2	
						
					local country_start = `n'+`=scalar(n_countries)'
					
					matlist c_`var'_`n'
					matrix results = c_`var'_`n'
					
					local b = string(results[1,2]*100,"%9.1f")
					local lci = string(results[5,2]*100,"%9.1f")
					local uci = string(results[6,2]*100,"%9.1f")
						
					if results[1,2] == . { //`b' == "0.0" {
						putexcel I`row' = "N/A"
					}
					
					else {
						putexcel I`row' = "`b' (`lci' to `uci')"
					}	
				}
			}		
